40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
49#define DEBUG_TYPE "si-lower"
54 "amdgpu-disable-loop-alignment",
55 cl::desc(
"Do not align and prefetch loops"),
59 "amdgpu-use-divergent-register-indexing",
61 cl::desc(
"Use indirect register addressing for divergent indexes"),
75 unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
76 for (
unsigned Reg = 0; Reg < NumSGPRs; ++
Reg) {
78 return AMDGPU::SGPR0 +
Reg;
194 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
195 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
196 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
197 MVT::i1, MVT::v32i32},
201 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
202 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
203 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32,
204 MVT::i1, MVT::v32i32},
211 ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FSQRT, ISD::FCBRT,
212 ISD::FSIN, ISD::FCOS, ISD::FPOW, ISD::FPOWI,
213 ISD::FLDEXP, ISD::FFREXP, ISD::FLOG, ISD::FLOG2,
214 ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
215 ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT,
273 {MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1},
Expand);
280 {MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
281 MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v9i32,
282 MVT::v10i32, MVT::v11i32, MVT::v12i32, MVT::v16i32},
285 {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
286 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v9f32,
287 MVT::v10f32, MVT::v11f32, MVT::v12f32, MVT::v16f32},
291 {MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
292 MVT::v3i16, MVT::v4i16, MVT::Other},
297 {MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64},
Expand);
313 {MVT::v8i32, MVT::v8f32, MVT::v9i32, MVT::v9f32, MVT::v10i32,
314 MVT::v10f32, MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32,
315 MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16,
316 MVT::v4f16, MVT::v4bf16, MVT::v3i64, MVT::v3f64, MVT::v6i32,
317 MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
318 MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16,
319 MVT::v16bf16, MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32,
320 MVT::v32i16, MVT::v32f16, MVT::v32bf16}) {
352 for (
MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
366 for (
MVT Vec64 : { MVT::v3i64, MVT::v3f64 }) {
380 for (
MVT Vec64 : { MVT::v4i64, MVT::v4f64 }) {
394 for (
MVT Vec64 : { MVT::v8i64, MVT::v8f64 }) {
408 for (
MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) {
423 {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32},
432 {MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::v2i8, MVT::v4i8,
433 MVT::v8i8, MVT::v4i16, MVT::v4f16, MVT::v4bf16},
438 {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32},
Custom);
442 {MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
443 MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
444 MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
445 MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
526 {MVT::f32, MVT::f64},
Legal);
619 {MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::v4i16, MVT::v4f16,
620 MVT::v4bf16, MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16,
621 MVT::v16f16, MVT::v16bf16, MVT::v32i16, MVT::v32f16}) {
756 {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
760 {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
764 {MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::v16i16, MVT::v16f16,
765 MVT::v16bf16, MVT::v32i16, MVT::v32f16, MVT::v32bf16}) {
787 {MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
788 MVT::v16f16, MVT::v16i16, MVT::v32f16, MVT::v32i16},
791 for (
MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16, MVT::v32i16})
799 for (
MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16})
815 {MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32},
835 {MVT::v4i16, MVT::v4f16, MVT::v4bf16, MVT::v2i8, MVT::v4i8,
836 MVT::v8i8, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
837 MVT::v16i16, MVT::v16f16, MVT::v16bf16, MVT::v32i16,
838 MVT::v32f16, MVT::v32bf16},
854 {MVT::f16, MVT::f32, MVT::f64, MVT::v2f16},
Legal);
856 {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
861 {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
862 MVT::bf16, MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::i128,
867 {MVT::v2f16, MVT::v2i16, MVT::v2bf16, MVT::v3f16,
868 MVT::v3i16, MVT::v4f16, MVT::v4i16, MVT::v4bf16,
869 MVT::v8i16, MVT::v8f16, MVT::v8bf16, MVT::Other, MVT::f16,
870 MVT::i16, MVT::bf16, MVT::i8, MVT::i128},
874 {MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v2bf16,
875 MVT::v3i16, MVT::v3f16, MVT::v4f16, MVT::v4i16,
876 MVT::v4bf16, MVT::v8i16, MVT::v8f16, MVT::v8bf16,
877 MVT::f16, MVT::i16, MVT::bf16, MVT::i8, MVT::i128},
934 ISD::ATOMIC_CMP_SWAP,
935 ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
937 ISD::ATOMIC_LOAD_ADD,
938 ISD::ATOMIC_LOAD_SUB,
939 ISD::ATOMIC_LOAD_AND,
941 ISD::ATOMIC_LOAD_XOR,
942 ISD::ATOMIC_LOAD_NAND,
943 ISD::ATOMIC_LOAD_MIN,
944 ISD::ATOMIC_LOAD_MAX,
945 ISD::ATOMIC_LOAD_UMIN,
946 ISD::ATOMIC_LOAD_UMAX,
947 ISD::ATOMIC_LOAD_FADD,
948 ISD::ATOMIC_LOAD_FMIN,
949 ISD::ATOMIC_LOAD_FMAX,
950 ISD::ATOMIC_LOAD_UINC_WRAP,
951 ISD::ATOMIC_LOAD_UDEC_WRAP,
966 static const MCPhysReg RCRegs[] = {AMDGPU::MODE};
979 EVT DestVT,
EVT SrcVT)
const {
989 LLT DestTy,
LLT SrcTy)
const {
990 return ((Opcode == TargetOpcode::G_FMAD && Subtarget->
hasMadMixInsts()) ||
991 (Opcode == TargetOpcode::G_FMA && Subtarget->
hasFmaMixInsts())) &&
993 SrcTy.getScalarSizeInBits() == 16 &&
1017 return (ScalarVT == MVT::bf16 ? MVT::i32 : MVT::v2f16);
1019 return VT.
isInteger() ? MVT::i32 : MVT::f32;
1046 return (NumElts + 1) / 2;
1052 return NumElts * ((
Size + 31) / 32);
1061 EVT VT,
EVT &IntermediateVT,
1062 unsigned &NumIntermediates,
MVT &RegisterVT)
const {
1071 if (ScalarVT == MVT::bf16) {
1072 RegisterVT = MVT::i32;
1073 IntermediateVT = MVT::v2bf16;
1075 RegisterVT = VT.
isInteger() ? MVT::v2i16 : MVT::v2f16;
1076 IntermediateVT = RegisterVT;
1078 NumIntermediates = (NumElts + 1) / 2;
1079 return NumIntermediates;
1084 IntermediateVT = RegisterVT;
1085 NumIntermediates = NumElts;
1086 return NumIntermediates;
1091 RegisterVT = MVT::i16;
1092 IntermediateVT = ScalarVT;
1093 NumIntermediates = NumElts;
1094 return NumIntermediates;
1099 RegisterVT = MVT::i32;
1100 IntermediateVT = ScalarVT;
1101 NumIntermediates = NumElts;
1102 return NumIntermediates;
1106 RegisterVT = MVT::i32;
1107 IntermediateVT = RegisterVT;
1108 NumIntermediates = NumElts * ((
Size + 31) / 32);
1109 return NumIntermediates;
1114 Context,
CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
1119 unsigned MaxNumLanes) {
1120 assert(MaxNumLanes != 0);
1124 unsigned NumElts = std::min(MaxNumLanes, VT->getNumElements());
1135 unsigned MaxNumLanes) {
1141 assert(ST->getNumContainedTypes() == 2 &&
1142 ST->getContainedType(1)->isIntegerTy(32));
1157 DL.getPointerSizeInBits(AS) == 192)
1167 DL.getPointerSizeInBits(AS) == 160) ||
1169 DL.getPointerSizeInBits(AS) == 192))
1177 unsigned IntrID)
const {
1179 if (CI.
hasMetadata(LLVMContext::MD_invariant_load))
1194 if (RsrcIntr->IsImage) {
1209 Info.ptrVal = RsrcArg;
1217 if (RsrcIntr->IsImage) {
1218 unsigned MaxNumLanes = 4;
1233 std::numeric_limits<unsigned>::max());
1243 if (RsrcIntr->IsImage) {
1262 if (RsrcIntr->IsImage && BaseOpcode->
NoReturn) {
1264 Info.memVT = MVT::i32;
1271 case Intrinsic::amdgcn_raw_buffer_load_lds:
1272 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
1273 case Intrinsic::amdgcn_struct_buffer_load_lds:
1274 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
1280 case Intrinsic::amdgcn_raw_atomic_buffer_load:
1281 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load: {
1284 std::numeric_limits<unsigned>::max());
1294 case Intrinsic::amdgcn_ds_ordered_add:
1295 case Intrinsic::amdgcn_ds_ordered_swap: {
1308 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
1309 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
1312 Info.ptrVal =
nullptr;
1317 case Intrinsic::amdgcn_ds_append:
1318 case Intrinsic::amdgcn_ds_consume: {
1331 case Intrinsic::amdgcn_global_atomic_csub: {
1341 case Intrinsic::amdgcn_image_bvh_intersect_ray: {
1351 case Intrinsic::amdgcn_global_atomic_fadd:
1352 case Intrinsic::amdgcn_global_atomic_fmin:
1353 case Intrinsic::amdgcn_global_atomic_fmax:
1354 case Intrinsic::amdgcn_global_atomic_fmin_num:
1355 case Intrinsic::amdgcn_global_atomic_fmax_num:
1356 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
1357 case Intrinsic::amdgcn_flat_atomic_fadd:
1358 case Intrinsic::amdgcn_flat_atomic_fmin:
1359 case Intrinsic::amdgcn_flat_atomic_fmax:
1360 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1361 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1362 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
1363 case Intrinsic::amdgcn_atomic_cond_sub_u32:
1364 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
1375 case Intrinsic::amdgcn_global_load_tr_b64:
1376 case Intrinsic::amdgcn_global_load_tr_b128: {
1384 case Intrinsic::amdgcn_ds_gws_init:
1385 case Intrinsic::amdgcn_ds_gws_barrier:
1386 case Intrinsic::amdgcn_ds_gws_sema_v:
1387 case Intrinsic::amdgcn_ds_gws_sema_br:
1388 case Intrinsic::amdgcn_ds_gws_sema_p:
1389 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
1399 Info.memVT = MVT::i32;
1401 Info.align =
Align(4);
1403 if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
1409 case Intrinsic::amdgcn_global_load_lds: {
1417 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
1427 Info.memVT = MVT::i32;
1429 Info.align =
Align(4);
1442 case Intrinsic::amdgcn_addrspacecast_nonnull: {
1445 unsigned SrcAS =
I.getOperand(0)->getType()->getPointerAddressSpace();
1446 unsigned DstAS =
I.getType()->getPointerAddressSpace();
1458 Type *&AccessTy)
const {
1460 switch (
II->getIntrinsicID()) {
1461 case Intrinsic::amdgcn_atomic_cond_sub_u32:
1462 case Intrinsic::amdgcn_ds_append:
1463 case Intrinsic::amdgcn_ds_consume:
1464 case Intrinsic::amdgcn_ds_ordered_add:
1465 case Intrinsic::amdgcn_ds_ordered_swap:
1466 case Intrinsic::amdgcn_flat_atomic_fadd:
1467 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
1468 case Intrinsic::amdgcn_flat_atomic_fmax:
1469 case Intrinsic::amdgcn_flat_atomic_fmax_num:
1470 case Intrinsic::amdgcn_flat_atomic_fmin:
1471 case Intrinsic::amdgcn_flat_atomic_fmin_num:
1472 case Intrinsic::amdgcn_global_atomic_csub:
1473 case Intrinsic::amdgcn_global_atomic_fadd:
1474 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
1475 case Intrinsic::amdgcn_global_atomic_fmax:
1476 case Intrinsic::amdgcn_global_atomic_fmax_num:
1477 case Intrinsic::amdgcn_global_atomic_fmin:
1478 case Intrinsic::amdgcn_global_atomic_fmin_num:
1479 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
1480 case Intrinsic::amdgcn_global_load_tr_b64:
1481 case Intrinsic::amdgcn_global_load_tr_b128:
1482 Ptr =
II->getArgOperand(0);
1484 case Intrinsic::amdgcn_global_load_lds:
1485 Ptr =
II->getArgOperand(1);
1490 AccessTy =
II->getType();
1496 unsigned AddrSpace)
const {
1508 return AM.
Scale == 0 &&
1510 AM.
BaseOffs, AddrSpace, FlatVariant));
1530 return isLegalMUBUFAddressingMode(AM);
1533bool SITargetLowering::isLegalMUBUFAddressingMode(
const AddrMode &AM)
const {
1544 if (!
TII->isLegalMUBUFImmOffset(AM.BaseOffs))
1556 if (AM.HasBaseReg) {
1587 return isLegalMUBUFAddressingMode(AM);
1594 if (Ty->isSized() &&
DL.getTypeStoreSize(Ty) < 4)
1644 : isLegalMUBUFAddressingMode(AM);
1691 unsigned Size,
unsigned AddrSpace,
Align Alignment,
1705 Alignment < RequiredAlignment)
1726 RequiredAlignment =
Align(4);
1744 *IsFast = (Alignment >= RequiredAlignment) ? 64
1745 : (Alignment <
Align(4)) ? 32
1767 *IsFast = (Alignment >= RequiredAlignment) ? 96
1768 : (Alignment <
Align(4)) ? 32
1781 RequiredAlignment =
Align(8);
1792 *IsFast = (Alignment >= RequiredAlignment) ? 128
1793 : (Alignment <
Align(4)) ? 32
1810 *IsFast = (Alignment >= RequiredAlignment) ?
Size : 0;
1812 return Alignment >= RequiredAlignment ||
1817 bool AlignedBy4 = Alignment >=
Align(4);
1819 *IsFast = AlignedBy4;
1821 return AlignedBy4 ||
1831 bool AlignedBy4 = Alignment >=
Align(4);
1833 *IsFast = AlignedBy4;
1844 return Alignment >=
Align(4) ||
1858 return Size >= 32 && Alignment >=
Align(4);
1863 unsigned *IsFast)
const {
1865 Alignment, Flags, IsFast);
1875 if (
Op.size() >= 16 &&
1879 if (
Op.size() >= 8 &&
Op.isDstAligned(
Align(4)))
1897 unsigned DestAS)
const {
1905 return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
1929 unsigned Index)
const {
1976 std::tie(InputPtrReg, RC, ArgTy) =
1986 MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
1992 const SDLoc &SL)
const {
1999 const SDLoc &SL)
const {
2002 std::optional<uint32_t> KnownSize =
2004 if (KnownSize.has_value())
2031 Val = getFPExtOrFPRound(DAG, Val, SL, VT);
2040SDValue SITargetLowering::lowerKernargMemParameter(
2052 int64_t OffsetDiff =
Offset - AlignDownOffset;
2058 SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
2067 ArgVal = DAG.
getNode(ISD::BITCAST, SL, MemVT, ArgVal);
2068 ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal,
Signed, Arg);
2079 SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load,
Signed, Arg);
2126 ExtType, SL, VA.
getLocVT(), Chain, FIN,
2155 Reg = &WorkGroupIDX;
2156 RC = &AMDGPU::SReg_32RegClass;
2160 Reg = &WorkGroupIDY;
2161 RC = &AMDGPU::SReg_32RegClass;
2165 Reg = &WorkGroupIDZ;
2166 RC = &AMDGPU::SReg_32RegClass;
2193 CallingConv::ID CallConv,
2197 for (
unsigned I = 0, E = Ins.size(), PSInputNum = 0;
I != E; ++
I) {
2201 "vector type argument should have been split");
2206 bool SkipArg = !Arg->
Used && !Info->isPSInputAllocated(PSInputNum);
2215 "unexpected vector split in ps argument type");
2229 Info->markPSInputAllocated(PSInputNum);
2231 Info->markPSInputEnabled(PSInputNum);
2248 if (Info.hasWorkItemIDX()) {
2254 Info.hasWorkItemIDY()) ? 0x3ff : ~0u;
2258 if (Info.hasWorkItemIDY()) {
2259 assert(Info.hasWorkItemIDX());
2264 unsigned Reg = AMDGPU::VGPR1;
2272 if (Info.hasWorkItemIDZ()) {
2273 assert(Info.hasWorkItemIDX() && Info.hasWorkItemIDY());
2278 unsigned Reg = AMDGPU::VGPR2;
2298 if (RegIdx == ArgVGPRs.
size()) {
2305 unsigned Reg = ArgVGPRs[RegIdx];
2307 assert(Reg != AMDGPU::NoRegister);
2317 unsigned NumArgRegs) {
2320 if (RegIdx == ArgSGPRs.
size())
2323 unsigned Reg = ArgSGPRs[RegIdx];
2325 assert(Reg != AMDGPU::NoRegister);
2339 assert(Reg != AMDGPU::NoRegister);
2365 const unsigned Mask = 0x3ff;
2368 if (Info.hasWorkItemIDX()) {
2370 Info.setWorkItemIDX(Arg);
2373 if (Info.hasWorkItemIDY()) {
2375 Info.setWorkItemIDY(Arg);
2378 if (Info.hasWorkItemIDZ())
2390 const unsigned Mask = 0x3ff;
2401 auto &
ArgInfo = Info.getArgInfo();
2415 if (Info.hasImplicitArgPtr())
2423 if (Info.hasWorkGroupIDX())
2426 if (Info.hasWorkGroupIDY())
2429 if (Info.hasWorkGroupIDZ())
2432 if (Info.hasLDSKernelId())
2443 Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(
TRI);
2444 MF.
addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
2450 Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(
TRI);
2451 MF.
addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
2456 Register DispatchPtrReg = Info.addDispatchPtr(
TRI);
2457 MF.
addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
2465 MF.
addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
2471 Register InputPtrReg = Info.addKernargSegmentPtr(
TRI);
2480 MF.
addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
2485 Register FlatScratchInitReg = Info.addFlatScratchInit(
TRI);
2486 MF.
addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
2491 Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(
TRI);
2492 MF.
addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
2507 unsigned LastExplicitArgOffset =
2510 bool InPreloadSequence =
true;
2512 for (
auto &Arg :
F.args()) {
2513 if (!InPreloadSequence || !Arg.hasInRegAttr())
2516 int ArgIdx = Arg.getArgNo();
2519 if (InIdx < Ins.size() && (!Ins[InIdx].isOrigArg() ||
2520 (
int)Ins[InIdx].getOrigArgIndex() != ArgIdx))
2523 for (; InIdx < Ins.size() && Ins[InIdx].isOrigArg() &&
2524 (
int)Ins[InIdx].getOrigArgIndex() == ArgIdx;
2526 assert(ArgLocs[ArgIdx].isMemLoc());
2527 auto &ArgLoc = ArgLocs[InIdx];
2529 unsigned ArgOffset = ArgLoc.getLocMemOffset();
2531 unsigned NumAllocSGPRs =
2532 alignTo(ArgLoc.getLocVT().getFixedSizeInBits(), 32) / 32;
2535 if (ArgLoc.getLocVT().getStoreSize() < 4 && Alignment < 4) {
2536 Info.getArgInfo().PreloadKernArgs[InIdx].Regs.push_back(
2537 Info.getArgInfo().PreloadKernArgs[InIdx - 1].Regs[0]);
2541 unsigned Padding = ArgOffset - LastExplicitArgOffset;
2542 unsigned PaddingSGPRs =
alignTo(Padding, 4) / 4;
2544 if (PaddingSGPRs + NumAllocSGPRs + 1 >
2546 InPreloadSequence =
false;
2552 TRI.getSGPRClassForBitWidth(NumAllocSGPRs * 32);
2554 Info.addPreloadedKernArg(
TRI, RC, NumAllocSGPRs, InIdx, PaddingSGPRs);
2556 if (PreloadRegs->
size() > 1)
2557 RC = &AMDGPU::SGPR_32RegClass;
2558 for (
auto &Reg : *PreloadRegs) {
2564 LastExplicitArgOffset = NumAllocSGPRs * 4 + ArgOffset;
2573 if (Info.hasLDSKernelId()) {
2574 Register Reg = Info.addLDSKernelId();
2575 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2584 CallingConv::ID CallConv,
2585 bool IsShader)
const {
2593 assert(!HasArchitectedSGPRs &&
"Unhandled feature for the subtarget");
2595 unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
2599 unsigned NumRequiredSystemSGPRs = Info.hasWorkGroupIDX() +
2600 Info.hasWorkGroupIDY() +
2601 Info.hasWorkGroupIDZ() +
2602 Info.hasWorkGroupInfo();
2603 for (
unsigned i = NumRequiredSystemSGPRs + CurrentUserSGPRs; i < 16; ++i) {
2604 Register Reg = Info.addReservedUserSGPR();
2605 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2610 if (!HasArchitectedSGPRs) {
2611 if (Info.hasWorkGroupIDX()) {
2612 Register Reg = Info.addWorkGroupIDX();
2613 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2617 if (Info.hasWorkGroupIDY()) {
2618 Register Reg = Info.addWorkGroupIDY();
2619 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2623 if (Info.hasWorkGroupIDZ()) {
2624 Register Reg = Info.addWorkGroupIDZ();
2625 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2630 if (Info.hasWorkGroupInfo()) {
2631 Register Reg = Info.addWorkGroupInfo();
2632 MF.
addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
2636 if (Info.hasPrivateSegmentWaveByteOffset()) {
2638 unsigned PrivateSegmentWaveByteOffsetReg;
2641 PrivateSegmentWaveByteOffsetReg =
2642 Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
2646 if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
2648 Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
2651 PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
2653 MF.
addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
2654 CCInfo.
AllocateReg(PrivateSegmentWaveByteOffsetReg);
2658 Info.getNumPreloadedSGPRs() >= 16);
2673 if (HasStackObjects)
2674 Info.setHasNonSpillStackObjects(
true);
2679 HasStackObjects =
true;
2683 bool RequiresStackAccess = HasStackObjects || MFI.
hasCalls();
2685 if (!ST.enableFlatScratch()) {
2686 if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.
getFunction())) {
2693 Info.setScratchRSrcReg(PrivateSegmentBufferReg);
2695 unsigned ReservedBufferReg =
TRI.reservedPrivateSegmentBufferReg(MF);
2705 Info.setScratchRSrcReg(ReservedBufferReg);
2724 if (!
MRI.isLiveIn(AMDGPU::SGPR32)) {
2725 Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
2732 for (
unsigned Reg : AMDGPU::SGPR_32RegClass) {
2733 if (!
MRI.isLiveIn(Reg)) {
2734 Info.setStackPtrOffsetReg(Reg);
2739 if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
2746 if (ST.getFrameLowering()->hasFP(MF)) {
2747 Info.setFrameOffsetReg(AMDGPU::SGPR33);
2765 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
2774 if (AMDGPU::SReg_64RegClass.
contains(*
I))
2775 RC = &AMDGPU::SGPR_64RegClass;
2776 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
2777 RC = &AMDGPU::SGPR_32RegClass;
2783 Entry->addLiveIn(*
I);
2788 for (
auto *Exit : Exits)
2790 TII->get(TargetOpcode::COPY), *
I)
2796 SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
2808 Fn,
"unsupported non-compute shaders with HSA",
DL.getDebugLoc());
2827 !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
2828 !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
2835 assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
2836 !Info->hasWorkGroupIDZ());
2855 if ((Info->getPSInputAddr() & 0x7F) == 0 ||
2856 ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11))) {
2859 Info->markPSInputAllocated(0);
2860 Info->markPSInputEnabled(0);
2871 unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
2872 if ((PsInputBits & 0x7F) == 0 ||
2873 ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
2876 }
else if (IsKernel) {
2877 assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
2879 Splits.
append(Ins.begin(), Ins.end());
2892 }
else if (!IsGraphics) {
2917 for (
unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
2927 if (IsEntryFunc && VA.
isMemLoc()) {
2950 if (Arg.
isOrigArg() && Info->getArgInfo().PreloadKernArgs.count(i)) {
2954 int64_t OffsetDiff =
Offset - AlignDownOffset;
2961 Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs[0];
2971 ArgVal = DAG.
getNode(ISD::BITCAST,
DL, MemVT, ArgVal);
2972 NewArg = convertArgType(DAG, VT, MemVT,
DL, ArgVal,
2973 Ins[i].Flags.isSExt(), &Ins[i]);
2981 Info->getArgInfo().PreloadKernArgs.find(i)->getSecond().Regs;
2984 if (PreloadRegs.
size() == 1) {
2985 Register VReg =
MRI.getLiveInVirtReg(PreloadRegs[0]);
2990 TRI->getRegSizeInBits(*RC)));
2998 for (
auto Reg : PreloadRegs) {
3005 PreloadRegs.size()),
3022 NewArg = convertArgType(DAG, VT, MemVT,
DL, NewArg,
3023 Ins[i].Flags.isSExt(), &Ins[i]);
3028 lowerKernargMemParameter(DAG, VT, MemVT,
DL, Chain,
Offset,
3029 Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
3048 if (!IsEntryFunc && VA.
isMemLoc()) {
3049 SDValue Val = lowerStackParameter(DAG, VA,
DL, Chain, Arg);
3060 if (AMDGPU::VGPR_32RegClass.
contains(Reg))
3061 RC = &AMDGPU::VGPR_32RegClass;
3062 else if (AMDGPU::SGPR_32RegClass.
contains(Reg))
3063 RC = &AMDGPU::SGPR_32RegClass;
3090 Val = DAG.
getNode(ISD::BITCAST,
DL, ValVT, Val);
3124 Info->setBytesInStackArgArea(StackArgSize);
3126 return Chains.
empty() ? Chain :
3133 CallingConv::ID CallConv,
3144 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3150 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
3151 for (
unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i)
3152 if (CCInfo.
isAllocated(AMDGPU::VGPR_32RegClass.getRegister(i)))
3174 Info->setIfReturnsVoid(Outs.
empty());
3175 bool IsWaveEnd = Info->returnsVoid() && IsShader;
3193 for (
unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.
size();
I != E;
3194 ++
I, ++RealRVLocIdx) {
3198 SDValue Arg = OutVals[RealRVLocIdx];
3226 if (!Info->isEntryFunction()) {
3232 if (AMDGPU::SReg_64RegClass.
contains(*
I))
3234 else if (AMDGPU::SReg_32RegClass.
contains(*
I))
3250 return DAG.
getNode(Opc,
DL, MVT::Other, RetOps);
3254 SDValue Chain,
SDValue InGlue, CallingConv::ID CallConv,
bool IsVarArg,
3335 auto &ArgUsageInfo =
3337 CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
3367 std::tie(OutgoingArg, ArgRC, ArgTy) =
3375 std::tie(IncomingArg, IncomingArgRC, Ty) =
3377 assert(IncomingArgRC == ArgRC);
3380 EVT ArgVT =
TRI->getSpillSize(*ArgRC) == 8 ? MVT::i64 : MVT::i32;
3388 InputReg = getImplicitArgPtr(DAG,
DL);
3390 std::optional<uint32_t> Id =
3392 if (Id.has_value()) {
3404 RegsToPass.emplace_back(OutgoingArg->
getRegister(), InputReg);
3408 unsigned SpecialArgOffset =
3422 std::tie(OutgoingArg, ArgRC, Ty) =
3425 std::tie(OutgoingArg, ArgRC, Ty) =
3428 std::tie(OutgoingArg, ArgRC, Ty) =
3443 const bool NeedWorkItemIDX = !CLI.
CB->
hasFnAttr(
"amdgpu-no-workitem-id-x");
3444 const bool NeedWorkItemIDY = !CLI.
CB->
hasFnAttr(
"amdgpu-no-workitem-id-y");
3445 const bool NeedWorkItemIDZ = !CLI.
CB->
hasFnAttr(
"amdgpu-no-workitem-id-z");
3462 InputReg = InputReg.
getNode() ?
3471 InputReg = InputReg.
getNode() ?
3475 if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
3476 if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
3486 IncomingArgX ? *IncomingArgX :
3487 IncomingArgY ? *IncomingArgY :
3488 *IncomingArgZ, ~0u);
3495 RegsToPass.emplace_back(OutgoingArg->
getRegister(), InputReg);
3524 SDValue Callee, CallingConv::ID CalleeCC,
bool IsVarArg,
3536 if (Callee->isDivergent())
3543 const uint32_t *CallerPreserved =
TRI->getCallPreservedMask(MF, CallerCC);
3547 if (!CallerPreserved)
3550 bool CCMatch = CallerCC == CalleeCC;
3563 if (Arg.hasByValAttr())
3577 const uint32_t *CalleePreserved =
TRI->getCallPreservedMask(MF, CalleeCC);
3578 if (!
TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3587 CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, Ctx);
3615 CallingConv::ID CallConv = CLI.
CallConv;
3621 if (IsChainCallConv) {
3625 RequestedExec = CLI.
Args.back();
3626 assert(RequestedExec.
Node &&
"No node for EXEC");
3631 assert(CLI.
Outs.back().OrigArgIndex == 2 &&
"Unexpected last arg");
3632 CLI.
Outs.pop_back();
3636 assert(CLI.
Outs.back().OrigArgIndex == 2 &&
"Exec wasn't split up");
3637 CLI.
Outs.pop_back();
3642 "Haven't popped all the pieces of the EXEC mask");
3653 bool IsSibCall =
false;
3667 "unsupported call to variadic function ");
3675 "unsupported required tail call to function ");
3680 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3684 "site marked musttail or on llvm.amdgcn.cs.chain");
3691 if (!TailCallOpt && IsTailCall)
3736 if (!IsSibCall || IsChainCallConv) {
3743 RegsToPass.emplace_back(IsChainCallConv
3744 ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51
3745 : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
3752 MVT PtrVT = MVT::i32;
3755 for (
unsigned i = 0, e = ArgLocs.
size(); i != e; ++i) {
3783 RegsToPass.push_back(std::pair(VA.
getLocReg(), Arg));
3791 int32_t
Offset = LocMemOffset;
3798 unsigned OpSize = Flags.isByVal() ?
3804 ? Flags.getNonZeroByValAlign()
3831 if (Outs[i].Flags.isByVal()) {
3833 DAG.
getConstant(Outs[i].Flags.getByValSize(),
DL, MVT::i32);
3836 Outs[i].Flags.getNonZeroByValAlign(),
3838 nullptr, std::nullopt, DstInfo,
3844 DAG.
getStore(Chain,
DL, Arg, DstAddr, DstInfo, Alignment);
3850 if (!MemOpChains.
empty())
3856 for (
auto &RegToPass : RegsToPass) {
3858 RegToPass.second, InGlue);
3867 if (IsTailCall && !IsSibCall) {
3872 std::vector<SDValue> Ops;
3873 Ops.push_back(Chain);
3874 Ops.push_back(Callee);
3891 if (IsChainCallConv)
3892 Ops.push_back(RequestedExec.
Node);
3896 for (
auto &RegToPass : RegsToPass) {
3898 RegToPass.second.getValueType()));
3903 const uint32_t *Mask =
TRI->getCallPreservedMask(MF, CallConv);
3904 assert(Mask &&
"Missing call preserved mask for calling convention");
3914 MVT::Glue, GlueOps),
3919 Ops.push_back(InGlue);
3938 return DAG.
getNode(OPC,
DL, NodeTys, Ops);
3943 Chain =
Call.getValue(0);
3944 InGlue =
Call.getValue(1);
3946 uint64_t CalleePopBytes = NumBytes;
3965 EVT VT =
Op.getValueType();
3971 Register SPReg = Info->getStackPtrOffsetReg();
3991 Tmp1 = DAG.
getNode(Opc, dl, VT, SP, ScaledSize);
3992 if (Alignment && *Alignment > StackAlign) {
4020 if (
Op.getValueType() != MVT::i32)
4039 assert(
Op.getValueType() == MVT::i32);
4048 Op.getOperand(0), IntrinID, GetRoundBothImm);
4082 SDValue RoundModeTimesNumBits =
4102 TableEntry, EnumOffset);
4118 static_cast<uint32_t>(ConstMode->getZExtValue()),
4130 if (UseReducedTable) {
4136 SDValue RoundModeTimesNumBits =
4156 SDValue RoundModeTimesNumBits =
4165 NewMode = TruncTable;
4174 ReadFirstLaneID, NewMode);
4187 IntrinID, RoundBothImm, NewMode);
4193 if (
Op->isDivergent())
4212 SDValue Src =
Op.getOperand(IsStrict ? 1 : 0);
4213 EVT SrcVT = Src.getValueType();
4222 EVT DstVT =
Op.getValueType();
4226 return DAG.
getNode(ISD::BF16_TO_FP, SL, DstVT, BitCast);
4231 if (
Op.getValueType() != MVT::i64)
4245 Op.getOperand(0), IntrinID, ModeHwRegImm);
4247 Op.getOperand(0), IntrinID, TrapHwRegImm);
4254 SDValue Result = DAG.
getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
4261 if (
Op.getOperand(1).getValueType() != MVT::i64)
4264 SDValue Input = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i32,
Op.getOperand(1));
4273 ReadFirstLaneID, NewModeReg);
4275 ReadFirstLaneID, NewTrapReg);
4277 unsigned ModeHwReg =
4280 unsigned TrapHwReg =
4288 IntrinID, ModeHwRegImm, NewModeReg);
4291 IntrinID, TrapHwRegImm, NewTrapReg);
4298 .
Case(
"m0", AMDGPU::M0)
4299 .
Case(
"exec", AMDGPU::EXEC)
4300 .
Case(
"exec_lo", AMDGPU::EXEC_LO)
4301 .
Case(
"exec_hi", AMDGPU::EXEC_HI)
4302 .
Case(
"flat_scratch", AMDGPU::FLAT_SCR)
4303 .
Case(
"flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
4304 .
Case(
"flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
4307 if (Reg == AMDGPU::NoRegister) {
4321 case AMDGPU::EXEC_LO:
4322 case AMDGPU::EXEC_HI:
4323 case AMDGPU::FLAT_SCR_LO:
4324 case AMDGPU::FLAT_SCR_HI:
4329 case AMDGPU::FLAT_SCR:
4348 MI.setDesc(
TII->getKillTerminatorFromPseudo(
MI.getOpcode()));
4357static std::pair<MachineBasicBlock *, MachineBasicBlock *>
4379 auto Next = std::next(
I);
4392 return std::pair(LoopBB, RemainderBB);
4399 auto I =
MI.getIterator();
4400 auto E = std::next(
I);
4422 Src->setIsKill(
false);
4438 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4441 BuildMI(*LoopBB,
I,
DL,
TII->get(AMDGPU::S_GETREG_B32), Reg)
4463 unsigned InitReg,
unsigned ResultReg,
unsigned PhiReg,
4464 unsigned InitSaveExecReg,
int Offset,
bool UseGPRIdxMode,
4473 Register PhiExec =
MRI.createVirtualRegister(BoolRC);
4474 Register NewExec =
MRI.createVirtualRegister(BoolRC);
4475 Register CurrentIdxReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
4476 Register CondReg =
MRI.createVirtualRegister(BoolRC);
4484 BuildMI(LoopBB,
I,
DL,
TII->get(TargetOpcode::PHI), PhiExec)
4491 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
4495 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
4500 BuildMI(LoopBB,
I,
DL,
TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
4501 : AMDGPU::S_AND_SAVEEXEC_B64),
4505 MRI.setSimpleHint(NewExec, CondReg);
4507 if (UseGPRIdxMode) {
4509 SGPRIdxReg = CurrentIdxReg;
4511 SGPRIdxReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
4512 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::S_ADD_I32), SGPRIdxReg)
4519 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4522 BuildMI(LoopBB,
I,
DL,
TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
4529 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4531 BuildMI(LoopBB,
I,
DL,
TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
4532 : AMDGPU::S_XOR_B64_term), Exec)
4553 unsigned InitResultReg,
unsigned PhiReg,
int Offset,
4554 bool UseGPRIdxMode,
Register &SGPRIdxReg) {
4562 const auto *BoolXExecRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
4564 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
4565 Register TmpExec =
MRI.createVirtualRegister(BoolXExecRC);
4566 unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4567 unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4582 InitResultReg, DstReg, PhiReg, TmpExec,
4583 Offset, UseGPRIdxMode, SGPRIdxReg);
4600static std::pair<unsigned, int>
4605 int NumElts =
TRI.getRegSizeInBits(*SuperRC) / 32;
4610 return std::pair(AMDGPU::sub0,
Offset);
4647 Register Tmp =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
4664 Register SrcReg =
TII->getNamedOperand(
MI, AMDGPU::OpName::src)->getReg();
4665 int Offset =
TII->getNamedOperand(
MI, AMDGPU::OpName::offset)->getImm();
4674 const bool UseGPRIdxMode = ST.useVGPRIndexMode();
4677 if (
TII->getRegisterInfo().isSGPRClass(IdxRC)) {
4681 if (UseGPRIdxMode) {
4688 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
true);
4701 MI.eraseFromParent();
4710 Register PhiReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4711 Register InitReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4717 UseGPRIdxMode, SGPRIdxReg);
4721 if (UseGPRIdxMode) {
4723 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
true);
4725 BuildMI(*LoopBB, InsPt,
DL, GPRIDXDesc, Dst)
4730 BuildMI(*LoopBB, InsPt,
DL,
TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
4735 MI.eraseFromParent();
4752 int Offset =
TII->getNamedOperand(
MI, AMDGPU::OpName::offset)->getImm();
4763 const bool UseGPRIdxMode = ST.useVGPRIndexMode();
4765 if (Idx->
getReg() == AMDGPU::NoRegister) {
4776 MI.eraseFromParent();
4781 if (
TII->getRegisterInfo().isSGPRClass(IdxRC)) {
4785 if (UseGPRIdxMode) {
4789 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
false);
4798 const MCInstrDesc &MovRelDesc =
TII->getIndirectRegWriteMovRelPseudo(
4799 TRI.getRegSizeInBits(*VecRC), 32,
false);
4805 MI.eraseFromParent();
4815 Register PhiReg =
MRI.createVirtualRegister(VecRC);
4819 UseGPRIdxMode, SGPRIdxReg);
4822 if (UseGPRIdxMode) {
4824 TII->getIndirectGPRIDXPseudo(
TRI.getRegSizeInBits(*VecRC),
false);
4826 BuildMI(*LoopBB, InsPt,
DL, GPRIDXDesc, Dst)
4832 const MCInstrDesc &MovRelDesc =
TII->getIndirectRegWriteMovRelPseudo(
4833 TRI.getRegSizeInBits(*VecRC), 32,
false);
4834 BuildMI(*LoopBB, InsPt,
DL, MovRelDesc, Dst)
4840 MI.eraseFromParent();
4855 bool isSGPR =
TRI->isSGPRClass(
MRI.getRegClass(SrcReg));
4883 Register LoopIterator =
MRI.createVirtualRegister(WaveMaskRegClass);
4884 Register InitalValReg =
MRI.createVirtualRegister(DstRegClass);
4886 Register AccumulatorReg =
MRI.createVirtualRegister(DstRegClass);
4887 Register ActiveBitsReg =
MRI.createVirtualRegister(WaveMaskRegClass);
4888 Register NewActiveBitsReg =
MRI.createVirtualRegister(WaveMaskRegClass);
4890 Register FF1Reg =
MRI.createVirtualRegister(DstRegClass);
4891 Register LaneValueReg =
MRI.createVirtualRegister(DstRegClass);
4893 bool IsWave32 = ST.isWave32();
4894 unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
4895 unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
4900 (Opc == AMDGPU::S_MIN_U32) ? std::numeric_limits<uint32_t>::max() : 0;
4903 BuildMI(BB,
I,
DL,
TII->get(AMDGPU::S_MOV_B32), InitalValReg)
4908 I = ComputeLoop->end();
4910 BuildMI(*ComputeLoop,
I,
DL,
TII->get(AMDGPU::PHI), AccumulatorReg)
4914 BuildMI(*ComputeLoop,
I,
DL,
TII->get(AMDGPU::PHI), ActiveBitsReg)
4915 .
addReg(TmpSReg->getOperand(0).getReg())
4919 unsigned SFFOpc = IsWave32 ? AMDGPU::S_FF1_I32_B32 : AMDGPU::S_FF1_I32_B64;
4920 auto FF1 =
BuildMI(*ComputeLoop,
I,
DL,
TII->get(SFFOpc), FF1Reg)
4921 .
addReg(ActiveBits->getOperand(0).getReg());
4922 auto LaneValue =
BuildMI(*ComputeLoop,
I,
DL,
4923 TII->get(AMDGPU::V_READLANE_B32), LaneValueReg)
4925 .
addReg(FF1->getOperand(0).getReg());
4926 auto NewAccumulator =
BuildMI(*ComputeLoop,
I,
DL,
TII->get(Opc), DstReg)
4928 .
addReg(LaneValue->getOperand(0).getReg());
4931 unsigned BITSETOpc =
4932 IsWave32 ? AMDGPU::S_BITSET0_B32 : AMDGPU::S_BITSET0_B64;
4933 auto NewActiveBits =
4934 BuildMI(*ComputeLoop,
I,
DL,
TII->get(BITSETOpc), NewActiveBitsReg)
4935 .
addReg(FF1->getOperand(0).getReg())
4936 .
addReg(ActiveBits->getOperand(0).getReg());
4941 ActiveBits.
addReg(NewActiveBits->getOperand(0).getReg())
4945 unsigned CMPOpc = IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64;
4947 .
addReg(NewActiveBits->getOperand(0).getReg())
4949 BuildMI(*ComputeLoop,
I,
DL,
TII->get(AMDGPU::S_CBRANCH_SCC1))
4954 MI.eraseFromParent();
4965 switch (
MI.getOpcode()) {
4966 case AMDGPU::WAVE_REDUCE_UMIN_PSEUDO_U32:
4968 case AMDGPU::WAVE_REDUCE_UMAX_PSEUDO_U32:
4970 case AMDGPU::S_UADDO_PSEUDO:
4971 case AMDGPU::S_USUBO_PSEUDO: {
4978 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
4980 : AMDGPU::S_SUB_I32;
4987 MI.eraseFromParent();
4990 case AMDGPU::S_ADD_U64_PSEUDO:
4991 case AMDGPU::S_SUB_U64_PSEUDO: {
5000 bool IsAdd = (
MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
5002 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U64 : AMDGPU::S_SUB_U64;
5010 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5011 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5014 MI,
MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
5016 MI,
MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
5019 MI,
MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
5021 MI,
MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);
5023 unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
5024 unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
5037 MI.eraseFromParent();
5040 case AMDGPU::V_ADD_U64_PSEUDO:
5041 case AMDGPU::V_SUB_U64_PSEUDO: {
5047 bool IsAdd = (
MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);
5053 if (IsAdd && ST.hasLshlAddB64()) {
5059 TII->legalizeOperands(*
Add);
5060 MI.eraseFromParent();
5064 const auto *CarryRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
5066 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5067 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5069 Register CarryReg =
MRI.createVirtualRegister(CarryRC);
5070 Register DeadCarryReg =
MRI.createVirtualRegister(CarryRC);
5074 : &AMDGPU::VReg_64RegClass;
5077 : &AMDGPU::VReg_64RegClass;
5080 TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0);
5082 TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
5085 MI,
MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
5087 MI,
MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
5090 MI,
MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
5092 MI,
MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
5094 unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
5101 unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
5115 TII->legalizeOperands(*LoHalf);
5116 TII->legalizeOperands(*HiHalf);
5117 MI.eraseFromParent();
5120 case AMDGPU::S_ADD_CO_PSEUDO:
5121 case AMDGPU::S_SUB_CO_PSEUDO: {
5135 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
5136 ? AMDGPU::S_ADDC_U32
5137 : AMDGPU::S_SUBB_U32;
5139 Register RegOp0 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5140 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
5145 Register RegOp1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5146 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1)
5150 Register RegOp2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5152 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2)
5158 unsigned WaveSize =
TRI->getRegSizeInBits(*Src2RC);
5159 assert(WaveSize == 64 || WaveSize == 32);
5161 if (WaveSize == 64) {
5162 if (ST.hasScalarCompareEq64()) {
5168 TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
5170 MII,
MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
5172 MII,
MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
5173 Register Src2_32 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5175 BuildMI(*BB, MII,
DL,
TII->get(AMDGPU::S_OR_B32), Src2_32)
5192 (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
5198 MI.eraseFromParent();
5201 case AMDGPU::SI_INIT_M0: {
5203 TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
5204 .
add(
MI.getOperand(0));
5205 MI.eraseFromParent();
5208 case AMDGPU::GET_GROUPSTATICSIZE: {
5213 .
add(
MI.getOperand(0))
5215 MI.eraseFromParent();
5218 case AMDGPU::GET_SHADERCYCLESHILO: {
5232 using namespace AMDGPU::Hwreg;
5233 Register RegHi1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5235 .
addImm(HwregEncoding::encode(ID_SHADER_CYCLES_HI, 0, 32));
5236 Register RegLo1 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5238 .
addImm(HwregEncoding::encode(ID_SHADER_CYCLES, 0, 32));
5239 Register RegHi2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5241 .
addImm(HwregEncoding::encode(ID_SHADER_CYCLES_HI, 0, 32));
5245 Register RegLo =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5250 .
add(
MI.getOperand(0))
5255 MI.eraseFromParent();
5258 case AMDGPU::SI_INDIRECT_SRC_V1:
5259 case AMDGPU::SI_INDIRECT_SRC_V2:
5260 case AMDGPU::SI_INDIRECT_SRC_V4:
5261 case AMDGPU::SI_INDIRECT_SRC_V8:
5262 case AMDGPU::SI_INDIRECT_SRC_V9:
5263 case AMDGPU::SI_INDIRECT_SRC_V10:
5264 case AMDGPU::SI_INDIRECT_SRC_V11:
5265 case AMDGPU::SI_INDIRECT_SRC_V12:
5266 case AMDGPU::SI_INDIRECT_SRC_V16:
5267 case AMDGPU::SI_INDIRECT_SRC_V32:
5269 case AMDGPU::SI_INDIRECT_DST_V1:
5270 case AMDGPU::SI_INDIRECT_DST_V2:
5271 case AMDGPU::SI_INDIRECT_DST_V4:
5272 case AMDGPU::SI_INDIRECT_DST_V8:
5273 case AMDGPU::SI_INDIRECT_DST_V9:
5274 case AMDGPU::SI_INDIRECT_DST_V10:
5275 case AMDGPU::SI_INDIRECT_DST_V11:
5276 case AMDGPU::SI_INDIRECT_DST_V12:
5277 case AMDGPU::SI_INDIRECT_DST_V16:
5278 case AMDGPU::SI_INDIRECT_DST_V32:
5280 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
5281 case AMDGPU::SI_KILL_I1_PSEUDO:
5283 case AMDGPU::V_CNDMASK_B64_PSEUDO: {
5292 Register SrcCond =
MI.getOperand(3).getReg();
5294 Register DstLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5295 Register DstHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5296 const auto *CondRC =
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
5297 Register SrcCondCopy =
MRI.createVirtualRegister(CondRC);
5301 : &AMDGPU::VReg_64RegClass;
5304 : &AMDGPU::VReg_64RegClass;
5307 TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0);
5309 TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
5312 MI,
MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
5314 MI,
MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
5317 MI,
MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
5319 MI,
MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
5341 MI.eraseFromParent();
5344 case AMDGPU::SI_BR_UNDEF: {
5348 .
add(
MI.getOperand(0));
5350 MI.eraseFromParent();
5353 case AMDGPU::ADJCALLSTACKUP:
5354 case AMDGPU::ADJCALLSTACKDOWN: {
5361 case AMDGPU::SI_CALL_ISEL: {
5365 unsigned ReturnAddrReg =
TII->getRegisterInfo().getReturnAddressReg(*MF);
5368 MIB =
BuildMI(*BB,
MI,
DL,
TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
5374 MI.eraseFromParent();
5377 case AMDGPU::V_ADD_CO_U32_e32:
5378 case AMDGPU::V_SUB_CO_U32_e32:
5379 case AMDGPU::V_SUBREV_CO_U32_e32: {
5382 unsigned Opc =
MI.getOpcode();
5384 bool NeedClampOperand =
false;
5385 if (
TII->pseudoToMCOpcode(Opc) == -1) {
5387 NeedClampOperand =
true;
5391 if (
TII->isVOP3(*
I)) {
5396 I.add(
MI.getOperand(1))
5397 .add(
MI.getOperand(2));
5398 if (NeedClampOperand)
5401 TII->legalizeOperands(*
I);
5403 MI.eraseFromParent();
5406 case AMDGPU::V_ADDC_U32_e32:
5407 case AMDGPU::V_SUBB_U32_e32:
5408 case AMDGPU::V_SUBBREV_U32_e32:
5411 TII->legalizeOperands(
MI);
5413 case AMDGPU::DS_GWS_INIT:
5414 case AMDGPU::DS_GWS_SEMA_BR:
5415 case AMDGPU::DS_GWS_BARRIER:
5416 TII->enforceOperandRCAlignment(
MI, AMDGPU::OpName::data0);
5418 case AMDGPU::DS_GWS_SEMA_V:
5419 case AMDGPU::DS_GWS_SEMA_P:
5420 case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
5428 case AMDGPU::S_SETREG_B32: {
5438 auto [ID,
Offset, Width] =
5444 const unsigned SetMask = WidthMask <<
Offset;
5447 unsigned SetDenormOp = 0;
5448 unsigned SetRoundOp = 0;
5456 SetRoundOp = AMDGPU::S_ROUND_MODE;
5457 SetDenormOp = AMDGPU::S_DENORM_MODE;
5459 SetRoundOp = AMDGPU::S_ROUND_MODE;
5461 SetDenormOp = AMDGPU::S_DENORM_MODE;
5464 if (SetRoundOp || SetDenormOp) {
5467 if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
5468 unsigned ImmVal = Def->getOperand(1).getImm();
5482 MI.eraseFromParent();
5491 MI.setDesc(
TII->get(AMDGPU::S_SETREG_B32_mode));
5495 case AMDGPU::S_INVERSE_BALLOT_U32:
5496 case AMDGPU::S_INVERSE_BALLOT_U64:
5499 MI.setDesc(
TII->get(AMDGPU::COPY));
5501 case AMDGPU::ENDPGM_TRAP: {
5504 MI.setDesc(
TII->get(AMDGPU::S_ENDPGM));
5522 MI.eraseFromParent();
5525 case AMDGPU::SIMULATED_TRAP: {
5529 TII->insertSimulatedTrap(
MRI, *BB,
MI,
MI.getDebugLoc());
5530 MI.eraseFromParent();
5567 return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
5571 return (Ty.getScalarSizeInBits() <= 16 && Subtarget->
has16BitInsts())
5623 switch (Ty.getScalarSizeInBits()) {
5641 if (Ty.getScalarSizeInBits() == 16)
5643 if (Ty.getScalarSizeInBits() == 32)
5654 EVT VT =
N->getValueType(0);
5658 if (VT == MVT::f16) {
5674 unsigned Opc =
Op.getOpcode();
5675 EVT VT =
Op.getValueType();
5676 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
5677 VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
5678 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5679 VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
5697 unsigned Opc =
Op.getOpcode();
5698 EVT VT =
Op.getValueType();
5699 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
5700 VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
5701 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5702 VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
5721 unsigned Opc =
Op.getOpcode();
5722 EVT VT =
Op.getValueType();
5723 assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
5724 VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v16i16 ||
5725 VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
5726 VT == MVT::v32f32 || VT == MVT::v32f16 || VT == MVT::v32i16 ||
5727 VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v16bf16 ||
5728 VT == MVT::v32bf16);
5734 : std::pair(Op0, Op0);
5753 switch (
Op.getOpcode()) {
5755 case ISD::BRCOND:
return LowerBRCOND(
Op, DAG);
5759 assert((!Result.getNode() ||
5760 Result.getNode()->getNumValues() == 2) &&
5761 "Load should return a value and a chain");
5765 EVT VT =
Op.getValueType();
5767 return lowerFSQRTF32(
Op, DAG);
5769 return lowerFSQRTF64(
Op, DAG);
5774 return LowerTrig(
Op, DAG);
5777 case ISD::FFREXP:
return LowerFFREXP(
Op, DAG);
5778 case ISD::ATOMIC_CMP_SWAP:
return LowerATOMIC_CMP_SWAP(
Op, DAG);
5779 case ISD::STORE:
return LowerSTORE(
Op, DAG);
5783 return LowerGlobalAddress(MFI,
Op, DAG);
5788 case ISD::ADDRSPACECAST:
return lowerADDRSPACECAST(
Op, DAG);
5790 return lowerINSERT_SUBVECTOR(
Op, DAG);
5792 return lowerINSERT_VECTOR_ELT(
Op, DAG);
5794 return lowerEXTRACT_VECTOR_ELT(
Op, DAG);
5796 return lowerVECTOR_SHUFFLE(
Op, DAG);
5798 return lowerSCALAR_TO_VECTOR(
Op, DAG);
5800 return lowerBUILD_VECTOR(
Op, DAG);
5803 return lowerFP_ROUND(
Op, DAG);
5808 if (
Op.getOperand(0)->getValueType(0) != MVT::f32)
5812 int RoundMode =
Op.getConstantOperandVal(1);
5820 return DAG.
getNode(Opc,
DL,
Op.getNode()->getVTList(),
Op->getOperand(0));
5823 return lowerTRAP(
Op, DAG);
5824 case ISD::DEBUGTRAP:
5825 return lowerDEBUGTRAP(
Op, DAG);
5834 return lowerFMINNUM_FMAXNUM(
Op, DAG);
5837 return lowerFLDEXP(
Op, DAG);
5854 case ISD::FMINNUM_IEEE:
5855 case ISD::FMAXNUM_IEEE:
5864 return lowerMUL(
Op, DAG);
5867 return lowerXMULO(
Op, DAG);
5870 return lowerXMUL_LOHI(
Op, DAG);
5871 case ISD::DYNAMIC_STACKALLOC:
5873 case ISD::STACKSAVE:
5877 case ISD::SET_ROUNDING:
5881 case ISD::FP_EXTEND:
5884 case ISD::GET_FPENV:
5886 case ISD::SET_FPENV:
5903 EVT FittingLoadVT = LoadVT;
5928 return DAG.
getNode(ISD::BITCAST,
DL, FittingLoadVT, Result);
5932 return DAG.
getNode(ISD::BITCAST,
DL, FittingLoadVT, Result);
5935SDValue SITargetLowering::adjustLoadValueType(
unsigned Opcode,
5939 bool IsIntrinsic)
const {
5943 EVT LoadVT = M->getValueType(0);
5945 EVT EquivLoadVT = LoadVT;
5964 VTList, Ops, M->getMemoryVT(),
5965 M->getMemOperand());
5976 EVT LoadVT = M->getValueType(0);
5982 assert(M->getNumValues() == 2 || M->getNumValues() == 3);
5983 bool IsTFE = M->getNumValues() == 3;
5996 return handleByteShortBufferLoads(DAG, LoadVT,
DL, Ops, M->getMemOperand(),
6000 return getMemIntrinsicNode(Opc,
DL, M->getVTList(), Ops, IntVT,
6001 M->getMemOperand(), DAG);
6006 SDValue MemNode = getMemIntrinsicNode(Opc,
DL, VTList, Ops, CastVT,
6007 M->getMemOperand(), DAG);
6015 EVT VT =
N->getValueType(0);
6016 unsigned CondCode =
N->getConstantOperandVal(3);
6027 EVT CmpVT = LHS.getValueType();
6028 if (CmpVT == MVT::i16 && !TLI.
isTypeLegal(MVT::i16)) {
6031 LHS = DAG.
getNode(PromoteOp,
DL, MVT::i32, LHS);
6032 RHS = DAG.
getNode(PromoteOp,
DL, MVT::i32, RHS);
6049 EVT VT =
N->getValueType(0);
6051 unsigned CondCode =
N->getConstantOperandVal(3);
6060 if (CmpVT == MVT::f16 && !TLI.
isTypeLegal(CmpVT)) {
6061 Src0 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
6062 Src1 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
6078 EVT VT =
N->getValueType(0);
6085 Src.getOperand(1), Src.getOperand(2));
6096 Exec = AMDGPU::EXEC_LO;
6098 Exec = AMDGPU::EXEC;
6115 EVT VT =
N->getValueType(0);
6117 unsigned IID =
N->getConstantOperandVal(0);
6118 bool IsPermLane16 = IID == Intrinsic::amdgcn_permlane16 ||
6119 IID == Intrinsic::amdgcn_permlanex16;
6127 case Intrinsic::amdgcn_permlane16:
6128 case Intrinsic::amdgcn_permlanex16:
6133 case Intrinsic::amdgcn_writelane:
6136 case Intrinsic::amdgcn_readlane:
6139 case Intrinsic::amdgcn_readfirstlane:
6140 case Intrinsic::amdgcn_permlane64:
6152 GL = GL->getOperand(0).getNode();
6162 if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane ||
6164 Src1 =
N->getOperand(2);
6165 if (IID == Intrinsic::amdgcn_writelane || IsPermLane16)
6166 Src2 =
N->getOperand(3);
6169 if (ValSize == 32) {
6184 if (IID == Intrinsic::amdgcn_writelane) {
6189 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
6191 return IsFloat ? DAG.
getBitcast(VT, Trunc) : Trunc;
6194 if (ValSize % 32 != 0)
6198 EVT VT =
N->getValueType(0);
6202 unsigned NumOperands =
N->getNumOperands();
6209 for (
unsigned i = 0; i != NE; ++i) {
6210 for (
unsigned j = 0, e = GL ? NumOperands - 1 : NumOperands; j != e;
6243 return unrollLaneOp(LaneOp.
getNode());
6250 SDValue Src0SubVec, Src1SubVec, Src2SubVec;
6251 for (
unsigned i = 0, EltIdx = 0; i < ValSize / 32; i++) {
6259 if (IID == Intrinsic::amdgcn_writelane)
6265 ? createLaneOp(Src0SubVec, Src1SubVec, Src2, SubVecVT)
6266 : createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
6283 if (IID == Intrinsic::amdgcn_writelane)
6286 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
6294 switch (
N->getOpcode()) {
6306 unsigned IID =
N->getConstantOperandVal(0);
6308 case Intrinsic::amdgcn_make_buffer_rsrc:
6309 Results.push_back(lowerPointerAsRsrcIntrin(
N, DAG));
6311 case Intrinsic::amdgcn_cvt_pkrtz: {
6317 Results.push_back(DAG.
getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
6320 case Intrinsic::amdgcn_cvt_pknorm_i16:
6321 case Intrinsic::amdgcn_cvt_pknorm_u16:
6322 case Intrinsic::amdgcn_cvt_pk_i16:
6323 case Intrinsic::amdgcn_cvt_pk_u16: {
6329 if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
6331 else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
6333 else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
6338 EVT VT =
N->getValueType(0);
6343 Results.push_back(DAG.
getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
6347 case Intrinsic::amdgcn_s_buffer_load: {
6359 EVT VT =
Op.getValueType();
6360 assert(VT == MVT::i8 &&
"Expected 8-bit s_buffer_load intrinsics.\n");
6372 if (!
Offset->isDivergent()) {
6391 LoadVal = handleByteShortBufferLoads(DAG, VT,
DL, Ops, MMO);
6403 for (
unsigned I = 0;
I < Res.getNumOperands();
I++) {
6404 Results.push_back(Res.getOperand(
I));
6408 Results.push_back(Res.getValue(1));
6417 EVT VT =
N->getValueType(0);
6422 EVT SelectVT = NewVT;
6423 if (NewVT.
bitsLT(MVT::i32)) {
6426 SelectVT = MVT::i32;
6430 N->getOperand(0), LHS, RHS);
6432 if (NewVT != SelectVT)
6438 if (
N->getValueType(0) != MVT::v2f16)
6442 SDValue BC = DAG.
getNode(ISD::BITCAST, SL, MVT::i32,
N->getOperand(0));
6451 if (
N->getValueType(0) != MVT::v2f16)
6455 SDValue BC = DAG.
getNode(ISD::BITCAST, SL, MVT::i32,
N->getOperand(0));
6464 if (
N->getValueType(0) != MVT::f16)
6482 if (
I.getUse().get() !=
Value)
6485 if (
I->getOpcode() == Opcode)
6491unsigned SITargetLowering::isCFIntrinsic(
const SDNode *
Intr)
const {
6493 switch (
Intr->getConstantOperandVal(1)) {
6494 case Intrinsic::amdgcn_if:
6496 case Intrinsic::amdgcn_else:
6498 case Intrinsic::amdgcn_loop:
6500 case Intrinsic::amdgcn_end_cf:
6548 SDNode *
Intr = BRCOND.getOperand(1).getNode();
6561 assert(BR &&
"brcond missing unconditional branch user");
6565 unsigned CFNode = isCFIntrinsic(
Intr);
6584 Ops.
append(
Intr->op_begin() + (HaveChain ? 2 : 1),
Intr->op_end());
6614 for (
unsigned i = 1, e =
Intr->getNumValues() - 1; i != e; ++i) {
6631 Intr->getOperand(0));
6638 MVT VT =
Op.getSimpleValueType();
6641 if (
Op.getConstantOperandVal(0) != 0)
6647 if (Info->isEntryFunction())
6665 return Op.getValueType().bitsLE(VT) ?
6672 assert(
Op.getValueType() == MVT::f16 &&
6673 "Do not know how to custom lower FP_ROUND for non-f16 type");
6676 EVT SrcVT = Src.getValueType();
6677 if (SrcVT != MVT::f64)
6688 return DAG.
getNode(ISD::BITCAST,
DL, MVT::f16, Trunc);
6693 EVT VT =
Op.getValueType();
6705 if (VT == MVT::v4f16 || VT == MVT::v8f16 || VT == MVT::v16f16 ||
6713 EVT VT =
Op.getValueType();
6717 EVT ExpVT =
Exp.getValueType();
6718 if (ExpVT == MVT::i16)
6739 {
Op.getOperand(0),
Op.getOperand(1), TruncExp});
6742 return DAG.
getNode(ISD::FLDEXP,
DL, VT,
Op.getOperand(0), TruncExp);
6747 EVT VT =
Op.getValueType();
6753 assert(VT == MVT::i64 &&
"The following code is a special for s_mul_u64");
6780 if (
Op->isDivergent())
6793 if (Op0LeadingZeros >= 32 && Op1LeadingZeros >= 32)
6795 DAG.
getMachineNode(AMDGPU::S_MUL_U64_U32_PSEUDO, SL, VT, Op0, Op1), 0);
6798 if (Op0SignBits >= 33 && Op1SignBits >= 33)
6800 DAG.
getMachineNode(AMDGPU::S_MUL_I64_I32_PSEUDO, SL, VT, Op0, Op1), 0);
6806 EVT VT =
Op.getValueType();
6813 const APInt &
C = RHSC->getAPIntValue();
6815 if (
C.isPowerOf2()) {
6817 bool UseArithShift =
isSigned && !
C.isMinSignedValue();
6822 SL, VT, Result, ShiftAmt),
6842 if (
Op->isDivergent()) {
6859 return lowerTrapEndpgm(
Op, DAG);
6862 lowerTrapHsaQueuePtr(
Op, DAG);
6865SDValue SITargetLowering::lowerTrapEndpgm(
6873 const SDLoc &
DL,
Align Alignment, ImplicitParameter Param)
const {
6883SDValue SITargetLowering::lowerTrapHsaQueuePtr(
6893 loadImplicitKernelArgument(DAG, MVT::i64, SL,
Align(8),
QUEUE_PTR);
6897 Register UserSGPR = Info->getQueuePtrUserSGPR();
6899 if (UserSGPR == AMDGPU::NoRegister) {
6924SDValue SITargetLowering::lowerTrapHsa(
6950 "debugtrap handler not supported",
6966SDValue SITargetLowering::getSegmentAperture(
unsigned AS,
const SDLoc &
DL,
6970 ? AMDGPU::SRC_SHARED_BASE
6971 : AMDGPU::SRC_PRIVATE_BASE;
6994 {SDValue(Mov, 0), DAG.getConstant(32, DL, MVT::i64)}));
7003 return loadImplicitKernelArgument(DAG, MVT::i32,
DL,
Align(4), Param);
7008 Register UserSGPR = Info->getQueuePtrUserSGPR();
7009 if (UserSGPR == AMDGPU::NoRegister) {
7016 DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
7044 return ConstVal->getSExtValue() !=
TM.getNullPointerValue(AddrSpace);
7058 unsigned DestAS, SrcAS;
7060 bool IsNonNull =
false;
7062 SrcAS = ASC->getSrcAddressSpace();
7063 Src = ASC->getOperand(0);
7064 DestAS = ASC->getDestAddressSpace();
7067 Op.getConstantOperandVal(0) ==
7068 Intrinsic::amdgcn_addrspacecast_nonnull);
7069 Src =
Op->getOperand(1);
7070 SrcAS =
Op->getConstantOperandVal(2);
7071 DestAS =
Op->getConstantOperandVal(3);
7086 unsigned NullVal =
TM.getNullPointerValue(DestAS);
7100 SDValue Aperture = getSegmentAperture(SrcAS, SL, DAG);
7103 CvtPtr = DAG.
getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);
7108 unsigned NullVal =
TM.getNullPointerValue(SrcAS);
7120 Op.getValueType() == MVT::i64) {
7125 return DAG.
getNode(ISD::BITCAST, SL, MVT::i64, Vec);
7129 Src.getValueType() == MVT::i64)
7153 EVT InsVT =
Ins.getValueType();
7161 assert(InsNumElts % 2 == 0 &&
"expect legal vector types");
7166 EVT NewInsVT = InsNumElts == 2 ? MVT::i32
7168 MVT::i32, InsNumElts / 2);
7170 Vec = DAG.
getNode(ISD::BITCAST, SL, NewVecVT, Vec);
7171 Ins = DAG.
getNode(ISD::BITCAST, SL, NewInsVT, Ins);
7173 for (
unsigned I = 0;
I != InsNumElts / 2; ++
I) {
7175 if (InsNumElts == 2) {
7185 return DAG.
getNode(ISD::BITCAST, SL, VecVT, Vec);
7188 for (
unsigned I = 0;
I != InsNumElts; ++
I) {
7211 if (NumElts == 4 && EltSize == 16 && KIdx) {
7219 SDValue LoVec = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i16, LoHalf);
7220 SDValue HiVec = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i16, HiHalf);
7222 unsigned Idx = KIdx->getZExtValue();
7223 bool InsertLo = Idx < 2;
7225 InsertLo ? LoVec : HiVec,
7226 DAG.
getNode(ISD::BITCAST, SL, MVT::i16, InsVal),
7227 DAG.
getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32));
7229 InsHalf = DAG.
getNode(ISD::BITCAST, SL, MVT::i32, InsHalf);
7233 DAG.getBuildVector(
MVT::v2i32, SL, { LoHalf, InsHalf });
7246 assert(VecSize <= 64 &&
"Expected target vector size to be <= 64 bits");
7268 DAG.
getNOT(SL, BFM, IntVT), BCVec);
7273 return DAG.
getNode(ISD::BITCAST, SL, VecVT, BFI);
7280 EVT ResultVT =
Op.getValueType();
7293 if (
SDValue Combined = performExtractVectorEltCombine(
Op.getNode(), DCI))
7296 if (VecSize == 128 || VecSize == 256 || VecSize == 512) {
7301 if (VecSize == 128) {
7309 }
else if (VecSize == 256) {
7312 for (
unsigned P = 0;
P < 4; ++
P) {
7318 Parts[0], Parts[1]));
7320 Parts[2], Parts[3]));
7326 for (
unsigned P = 0;
P < 8; ++
P) {
7333 Parts[0], Parts[1], Parts[2], Parts[3]));
7336 Parts[4], Parts[5],Parts[6], Parts[7]));
7356 Src = DAG.
getBitcast(Src.getValueType().changeTypeToInteger(), Src);
7371 if (ResultVT == MVT::f16 || ResultVT == MVT::bf16) {
7373 return DAG.
getNode(ISD::BITCAST, SL, ResultVT, Result);
7381 return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
7387 EVT ResultVT =
Op.getValueType();
7390 EVT PackVT = ResultVT.
isInteger() ? MVT::v2i16 : MVT::v2f16;
7392 int SrcNumElts =
Op.getOperand(0).getValueType().getVectorNumElements();
7408 int VecIdx = Idx < SrcNumElts ? 0 : 1;
7409 int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
7417 int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
7418 int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
7419 int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
7420 int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;
7439 EVT ResultVT =
Op.getValueType();
7455 EVT VT =
Op.getValueType();
7457 if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
7458 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7477 { CastLo, CastHi });
7478 return DAG.
getNode(ISD::BITCAST, SL, VT, Blend);
7481 if (VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v16bf16) {
7488 for (
unsigned P = 0;
P < 4; ++
P)
7489 Parts[
P].push_back(
Op.getOperand(
I +
P * E));
7492 for (
unsigned P = 0;
P < 4; ++
P) {
7494 Casts[
P] = DAG.
getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
7499 return DAG.
getNode(ISD::BITCAST, SL, VT, Blend);
7502 if (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v32bf16) {
7509 for (
unsigned P = 0;
P < 8; ++
P)
7510 Parts[
P].push_back(
Op.getOperand(
I +
P * E));
7513 for (
unsigned P = 0;
P < 8; ++
P) {
7515 Casts[
P] = DAG.
getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
7520 return DAG.
getNode(ISD::BITCAST, SL, VT, Blend);
7523 assert(VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16);
7533 return DAG.
getNode(ISD::BITCAST, SL, VT, ExtLo);
7542 return DAG.
getNode(ISD::BITCAST, SL, VT, ShlHi);
7548 return DAG.
getNode(ISD::BITCAST, SL, VT,
Or);
7613 EVT PtrVT =
Op.getValueType();
7629 assert(PtrVT == MVT::i32 &&
"32-bit pointer is expected.");
7702 SDValue Param = lowerKernargMemParameter(
7712 "non-hsa intrinsic with hsa target",
7721 "intrinsic not supported on subtarget",
7731 unsigned NumElts = Elts.
size();
7733 if (NumElts <= 12) {
7742 for (
unsigned i = 0; i < Elts.
size(); ++i) {
7748 for (
unsigned i = Elts.
size(); i < NumElts; ++i)
7749 VecElts[i] = DAG.
getUNDEF(MVT::f32);
7758 EVT SrcVT = Src.getValueType();
7779 bool Unpacked,
bool IsD16,
int DMaskPop,
7780 int NumVDataDwords,
bool IsAtomicPacked16Bit,
7783 EVT ReqRetVT = ResultTypes[0];
7785 int NumDataDwords = ((IsD16 && !Unpacked) || IsAtomicPacked16Bit)
7786 ? (ReqRetNumElts + 1) / 2
7789 int MaskPopDwords = (!IsD16 || Unpacked) ? DMaskPop : (DMaskPop + 1) / 2;
7791 MVT DataDwordVT = NumDataDwords == 1 ?
7794 MVT MaskPopVT = MaskPopDwords == 1 ?
7800 if (DMaskPop > 0 &&
Data.getValueType() != MaskPopVT) {
7811 if (DataDwordVT.
isVector() && !IsAtomicPacked16Bit)
7813 NumDataDwords - MaskPopDwords);
7818 EVT LegalReqRetVT = ReqRetVT;
7820 if (!
Data.getValueType().isInteger())
7822 Data.getValueType().changeTypeToInteger(),
Data);
7843 if (Result->getNumValues() == 1)
7850 SDValue *LWE,
bool &IsTexFail) {
7870 unsigned DimIdx,
unsigned EndIdx,
7871 unsigned NumGradients) {
7873 for (
unsigned I = DimIdx;
I < EndIdx;
I++) {
7881 if (((
I + 1) >= EndIdx) ||
7882 ((NumGradients / 2) % 2 == 1 && (
I == DimIdx + (NumGradients / 2) - 1 ||
7883 I == DimIdx + NumGradients - 1))) {
7884 if (
Addr.getValueType() != MVT::i16)
7905 unsigned IntrOpcode =
Intr->BaseOpcode;
7916 int NumVDataDwords = 0;
7917 bool AdjustRetType =
false;
7918 bool IsAtomicPacked16Bit =
false;
7921 const unsigned ArgOffset = WithChain ? 2 : 1;
7924 unsigned DMaskLanes = 0;
7926 if (BaseOpcode->Atomic) {
7927 VData =
Op.getOperand(2);
7929 IsAtomicPacked16Bit =
7930 (
Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
7931 Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
7934 if (BaseOpcode->AtomicX2) {
7941 ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
7942 DMask = Is64Bit ? 0xf : 0x3;
7943 NumVDataDwords = Is64Bit ? 4 : 2;
7945 DMask = Is64Bit ? 0x3 : 0x1;
7946 NumVDataDwords = Is64Bit ? 2 : 1;
7949 DMask =
Op->getConstantOperandVal(ArgOffset +
Intr->DMaskIndex);
7952 if (BaseOpcode->Store) {
7953 VData =
Op.getOperand(2);
7961 VData = handleD16VData(VData, DAG,
true);
7964 NumVDataDwords = (VData.
getValueType().getSizeInBits() + 31) / 32;
7965 }
else if (!BaseOpcode->NoReturn) {
7978 (!LoadVT.
isVector() && DMaskLanes > 1))
7986 NumVDataDwords = (DMaskLanes + 1) / 2;
7988 NumVDataDwords = DMaskLanes;
7990 AdjustRetType =
true;
7994 unsigned VAddrEnd = ArgOffset +
Intr->VAddrEnd;
7999 Op.getOperand(ArgOffset +
Intr->GradientStart).getSimpleValueType();
8001 MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
8002 IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
8004 VAddrVT =
Op.getOperand(ArgOffset +
Intr->CoordStart).getSimpleValueType();
8006 MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
8007 IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16;
8011 if (IsA16 && (
Op.getOperand(ArgOffset +
I).getValueType() == MVT::f16)) {
8012 assert(
I ==
Intr->BiasIndex &&
"Got unexpected 16-bit extra argument");
8017 {
Op.getOperand(ArgOffset +
I), DAG.
getUNDEF(MVT::f16)});
8021 "Bias needs to be converted to 16 bit in A16 mode");
8026 if (BaseOpcode->Gradients && !
ST->hasG16() && (IsA16 != IsG16)) {
8030 dbgs() <<
"Failed to lower image intrinsic: 16 bit addresses "
8031 "require 16 bit args for both gradients and addresses");
8036 if (!
ST->hasA16()) {
8037 LLVM_DEBUG(
dbgs() <<
"Failed to lower image intrinsic: Target does not "
8038 "support 16 bit addresses\n");
8048 if (BaseOpcode->Gradients && IsG16 &&
ST->hasG16()) {
8052 IntrOpcode = G16MappingInfo->
G16;
8060 ArgOffset +
Intr->GradientStart,
8061 ArgOffset +
Intr->CoordStart,
Intr->NumGradients);
8063 for (
unsigned I = ArgOffset +
Intr->GradientStart;
8071 ArgOffset +
Intr->CoordStart, VAddrEnd,
8075 for (
unsigned I = ArgOffset +
Intr->CoordStart;
I < VAddrEnd;
I++)
8093 const unsigned NSAMaxSize =
ST->getNSAMaxSize(BaseOpcode->Sampler);
8094 const bool HasPartialNSAEncoding =
ST->hasPartialNSAEncoding();
8095 const bool UseNSA =
ST->hasNSAEncoding() &&
8096 VAddrs.
size() >=
ST->getNSAThreshold(MF) &&
8097 (VAddrs.
size() <= NSAMaxSize || HasPartialNSAEncoding);
8098 const bool UsePartialNSA =
8099 UseNSA && HasPartialNSAEncoding && VAddrs.
size() > NSAMaxSize;
8102 if (UsePartialNSA) {
8104 ArrayRef(VAddrs).drop_front(NSAMaxSize - 1));
8113 if (!BaseOpcode->Sampler) {
8117 Op.getConstantOperandVal(ArgOffset +
Intr->UnormIndex);
8119 Unorm = UnormConst ? True : False;
8124 SDValue TexFail =
Op.getOperand(ArgOffset +
Intr->TexFailCtrlIndex);
8125 bool IsTexFail =
false;
8126 if (!
parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
8137 NumVDataDwords += 1;
8138 AdjustRetType =
true;
8143 if (AdjustRetType) {
8145 if (DMaskLanes == 0 && !BaseOpcode->Store) {
8153 EVT NewVT = NumVDataDwords > 1 ?
8157 ResultTypes[0] = NewVT;
8158 if (ResultTypes.size() == 3) {
8162 ResultTypes.erase(&ResultTypes[1]);
8166 unsigned CPol =
Op.getConstantOperandVal(ArgOffset +
Intr->CachePolicyIndex);
8167 if (BaseOpcode->Atomic)
8174 if (BaseOpcode->Store || BaseOpcode->Atomic)
8176 if (UsePartialNSA) {
8185 if (BaseOpcode->Sampler)
8190 if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
8194 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
8202 if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
8206 if (BaseOpcode->HasD16)
8211 int NumVAddrDwords =
8217 NumVDataDwords, NumVAddrDwords);
8218 }
else if (IsGFX11Plus) {
8220 UseNSA ? AMDGPU::MIMGEncGfx11NSA
8221 : AMDGPU::MIMGEncGfx11Default,
8222 NumVDataDwords, NumVAddrDwords);
8223 }
else if (IsGFX10Plus) {
8225 UseNSA ? AMDGPU::MIMGEncGfx10NSA
8226 : AMDGPU::MIMGEncGfx10Default,
8227 NumVDataDwords, NumVAddrDwords);
8231 NumVDataDwords, NumVAddrDwords);
8234 "requested image instruction is not supported on this GPU");
8239 NumVDataDwords, NumVAddrDwords);
8242 NumVDataDwords, NumVAddrDwords);
8253 if (BaseOpcode->AtomicX2) {
8258 if (BaseOpcode->NoReturn)
8262 NumVDataDwords, IsAtomicPacked16Bit,
DL);
8280 if (!
Offset->isDivergent()) {
8325 return handleByteShortBufferLoads(DAG, VT,
DL, Ops, MMO);
8329 unsigned NumLoads = 1;
8335 if (NumElts == 8 || NumElts == 16) {
8336 NumLoads = NumElts / 4;
8344 setBufferOffsets(
Offset, DAG, &Ops[3],
8345 NumLoads > 1 ?
Align(16 * NumLoads) :
Align(4));
8348 for (
unsigned i = 0; i < NumLoads; ++i) {
8354 if (NumElts == 8 || NumElts == 16)
8401 EVT VT =
Op.getValueType();
8403 unsigned IntrinsicID =
Op.getConstantOperandVal(0);
8407 switch (IntrinsicID) {
8408 case Intrinsic::amdgcn_implicit_buffer_ptr: {
8411 return getPreloadedValue(DAG, *MFI, VT,
8414 case Intrinsic::amdgcn_dispatch_ptr:
8415 case Intrinsic::amdgcn_queue_ptr: {
8418 MF.
getFunction(),
"unsupported hsa intrinsic without hsa target",
8424 auto RegID = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
8426 return getPreloadedValue(DAG, *MFI, VT, RegID);
8428 case Intrinsic::amdgcn_implicitarg_ptr: {
8430 return getImplicitArgPtr(DAG,
DL);
8431 return getPreloadedValue(DAG, *MFI, VT,
8434 case Intrinsic::amdgcn_kernarg_segment_ptr: {
8440 return getPreloadedValue(DAG, *MFI, VT,
8443 case Intrinsic::amdgcn_dispatch_id: {
8446 case Intrinsic::amdgcn_rcp:
8448 case Intrinsic::amdgcn_rsq:
8450 case Intrinsic::amdgcn_rsq_legacy:
8454 case Intrinsic::amdgcn_rcp_legacy:
8458 case Intrinsic::amdgcn_rsq_clamp: {
8469 return DAG.
getNode(ISD::FMAXNUM,
DL, VT, Tmp,
8472 case Intrinsic::r600_read_ngroups_x:
8476 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8479 case Intrinsic::r600_read_ngroups_y:
8483 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8486 case Intrinsic::r600_read_ngroups_z:
8490 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8493 case Intrinsic::r600_read_global_size_x:
8497 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8500 case Intrinsic::r600_read_global_size_y:
8504 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8507 case Intrinsic::r600_read_global_size_z:
8511 return lowerKernargMemParameter(DAG, VT, VT,
DL, DAG.
getEntryNode(),
8514 case Intrinsic::r600_read_local_size_x:
8518 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
8520 case Intrinsic::r600_read_local_size_y:
8524 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
8526 case Intrinsic::r600_read_local_size_z:
8530 return lowerImplicitZextParam(DAG,
Op, MVT::i16,
8532 case Intrinsic::amdgcn_workgroup_id_x:
8533 return getPreloadedValue(DAG, *MFI, VT,
8535 case Intrinsic::amdgcn_workgroup_id_y:
8536 return getPreloadedValue(DAG, *MFI, VT,
8538 case Intrinsic::amdgcn_workgroup_id_z:
8539 return getPreloadedValue(DAG, *MFI, VT,
8541 case Intrinsic::amdgcn_wave_id:
8542 return lowerWaveID(DAG,
Op);
8543 case Intrinsic::amdgcn_lds_kernel_id: {
8545 return getLDSKernelId(DAG,
DL);
8546 return getPreloadedValue(DAG, *MFI, VT,
8549 case Intrinsic::amdgcn_workitem_id_x:
8550 return lowerWorkitemID(DAG,
Op, 0, MFI->getArgInfo().WorkItemIDX);
8551 case Intrinsic::amdgcn_workitem_id_y:
8552 return lowerWorkitemID(DAG,
Op, 1, MFI->getArgInfo().WorkItemIDY);
8553 case Intrinsic::amdgcn_workitem_id_z:
8554 return lowerWorkitemID(DAG,
Op, 2, MFI->getArgInfo().WorkItemIDZ);
8555 case Intrinsic::amdgcn_wavefrontsize:
8558 case Intrinsic::amdgcn_s_buffer_load: {
8559 unsigned CPol =
Op.getConstantOperandVal(3);
8566 return lowerSBuffer(VT,
DL,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
8569 case Intrinsic::amdgcn_fdiv_fast:
8570 return lowerFDIV_FAST(
Op, DAG);
8571 case Intrinsic::amdgcn_sin:
8574 case Intrinsic::amdgcn_cos:
8577 case Intrinsic::amdgcn_mul_u24:
8579 case Intrinsic::amdgcn_mul_i24:
8582 case Intrinsic::amdgcn_log_clamp: {
8588 case Intrinsic::amdgcn_fract:
8591 case Intrinsic::amdgcn_class:
8593 Op.getOperand(1),
Op.getOperand(2));
8594 case Intrinsic::amdgcn_div_fmas:
8596 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
8599 case Intrinsic::amdgcn_div_fixup:
8601 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8603 case Intrinsic::amdgcn_div_scale: {
8616 SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
8619 Denominator, Numerator);
8621 case Intrinsic::amdgcn_icmp: {
8623 if (
Op.getOperand(1).getValueType() == MVT::i1 &&
8624 Op.getConstantOperandVal(2) == 0 &&
8629 case Intrinsic::amdgcn_fcmp: {
8632 case Intrinsic::amdgcn_ballot:
8634 case Intrinsic::amdgcn_fmed3:
8636 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8637 case Intrinsic::amdgcn_fdot2:
8639 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3),
8641 case Intrinsic::amdgcn_fmul_legacy:
8643 Op.getOperand(1),
Op.getOperand(2));
8644 case Intrinsic::amdgcn_sffbh:
8646 case Intrinsic::amdgcn_sbfe:
8648 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8649 case Intrinsic::amdgcn_ubfe:
8651 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
8652 case Intrinsic::amdgcn_cvt_pkrtz:
8653 case Intrinsic::amdgcn_cvt_pknorm_i16:
8654 case Intrinsic::amdgcn_cvt_pknorm_u16:
8655 case Intrinsic::amdgcn_cvt_pk_i16:
8656 case Intrinsic::amdgcn_cvt_pk_u16: {
8658 EVT VT =
Op.getValueType();
8661 if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
8663 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
8665 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
8667 else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
8673 return DAG.
getNode(Opcode,
DL, VT,
Op.getOperand(1),
Op.getOperand(2));
8676 Op.getOperand(1),
Op.getOperand(2));
8679 case Intrinsic::amdgcn_fmad_ftz:
8681 Op.getOperand(2),
Op.getOperand(3));
8683 case Intrinsic::amdgcn_if_break:
8685 Op->getOperand(1),
Op->getOperand(2)), 0);
8687 case Intrinsic::amdgcn_groupstaticsize: {
8699 case Intrinsic::amdgcn_is_shared:
8700 case Intrinsic::amdgcn_is_private: {
8702 unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ?
8704 SDValue Aperture = getSegmentAperture(AS, SL, DAG);
8712 case Intrinsic::amdgcn_perm:
8714 Op.getOperand(2),
Op.getOperand(3));
8715 case Intrinsic::amdgcn_reloc_constant: {
8725 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
8726 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
8727 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
8728 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
8729 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
8730 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
8731 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
8732 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: {
8733 if (
Op.getOperand(4).getValueType() == MVT::i32)
8739 Op.getOperand(0),
Op.getOperand(1),
Op.getOperand(2),
8740 Op.getOperand(3), IndexKeyi32);
8742 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
8743 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
8744 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: {
8745 if (
Op.getOperand(6).getValueType() == MVT::i32)
8751 {Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
8752 Op.getOperand(3), Op.getOperand(4), Op.getOperand(5),
8753 IndexKeyi32, Op.getOperand(7)});
8755 case Intrinsic::amdgcn_addrspacecast_nonnull:
8756 return lowerADDRSPACECAST(
Op, DAG);
8757 case Intrinsic::amdgcn_readlane:
8758 case Intrinsic::amdgcn_readfirstlane:
8759 case Intrinsic::amdgcn_writelane:
8760 case Intrinsic::amdgcn_permlane16:
8761 case Intrinsic::amdgcn_permlanex16:
8762 case Intrinsic::amdgcn_permlane64:
8767 return lowerImage(
Op, ImageDimIntr, DAG,
false);
8778 return DAG.
getRegister(AMDGPU::SGPR_NULL, MVT::i32);
8784 unsigned NewOpcode)
const {
8788 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8789 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
8807 M->getMemOperand());
8812 unsigned NewOpcode)
const {
8816 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
8817 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
8835 M->getMemOperand());
8840 unsigned IntrID =
Op.getConstantOperandVal(1);
8844 case Intrinsic::amdgcn_ds_ordered_add:
8845 case Intrinsic::amdgcn_ds_ordered_swap: {
8850 unsigned IndexOperand = M->getConstantOperandVal(7);
8851 unsigned WaveRelease = M->getConstantOperandVal(8);
8852 unsigned WaveDone = M->getConstantOperandVal(9);
8854 unsigned OrderedCountIndex = IndexOperand & 0x3f;
8855 IndexOperand &= ~0x3f;
8856 unsigned CountDw = 0;
8859 CountDw = (IndexOperand >> 24) & 0xf;
8860 IndexOperand &= ~(0xf << 24);
8862 if (CountDw < 1 || CountDw > 4) {
8864 "ds_ordered_count: dword count must be between 1 and 4");
8871 if (WaveDone && !WaveRelease)
8874 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
8875 unsigned ShaderType =
8877 unsigned Offset0 = OrderedCountIndex << 2;
8878 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
8881 Offset1 |= (CountDw - 1) << 6;
8884 Offset1 |= ShaderType << 2;
8886 unsigned Offset = Offset0 | (Offset1 << 8);
8895 M->getVTList(), Ops, M->getMemoryVT(),
8896 M->getMemOperand());
8898 case Intrinsic::amdgcn_raw_buffer_load:
8899 case Intrinsic::amdgcn_raw_ptr_buffer_load:
8900 case Intrinsic::amdgcn_raw_atomic_buffer_load:
8901 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
8902 case Intrinsic::amdgcn_raw_buffer_load_format:
8903 case Intrinsic::amdgcn_raw_ptr_buffer_load_format: {
8904 const bool IsFormat =
8905 IntrID == Intrinsic::amdgcn_raw_buffer_load_format ||
8906 IntrID == Intrinsic::amdgcn_raw_ptr_buffer_load_format;
8908 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8909 auto Offsets = splitBufferOffsets(
Op.getOperand(3), DAG);
8923 return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
8925 case Intrinsic::amdgcn_struct_buffer_load:
8926 case Intrinsic::amdgcn_struct_ptr_buffer_load:
8927 case Intrinsic::amdgcn_struct_buffer_load_format:
8928 case Intrinsic::amdgcn_struct_ptr_buffer_load_format: {
8929 const bool IsFormat =
8930 IntrID == Intrinsic::amdgcn_struct_buffer_load_format ||
8931 IntrID == Intrinsic::amdgcn_struct_ptr_buffer_load_format;
8933 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8934 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
8949 case Intrinsic::amdgcn_raw_tbuffer_load:
8950 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
8952 EVT LoadVT =
Op.getValueType();
8953 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8954 auto Offsets = splitBufferOffsets(
Op.getOperand(3), DAG);
8973 Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
8976 case Intrinsic::amdgcn_struct_tbuffer_load:
8977 case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
8979 EVT LoadVT =
Op.getValueType();
8980 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
8981 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
9000 Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
9003 case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
9004 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
9006 case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
9007 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
9009 case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
9010 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
9012 case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
9013 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin:
9015 case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
9016 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax:
9018 case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
9019 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
9021 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
9022 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
9024 case Intrinsic::amdgcn_raw_buffer_atomic_add:
9025 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
9027 case Intrinsic::amdgcn_raw_buffer_atomic_sub:
9028 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
9030 case Intrinsic::amdgcn_raw_buffer_atomic_smin:
9031 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
9033 case Intrinsic::amdgcn_raw_buffer_atomic_umin:
9034 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
9036 case Intrinsic::amdgcn_raw_buffer_atomic_smax:
9037 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
9039 case Intrinsic::amdgcn_raw_buffer_atomic_umax:
9040 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
9042 case Intrinsic::amdgcn_raw_buffer_atomic_and:
9043 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
9045 case Intrinsic::amdgcn_raw_buffer_atomic_or:
9046 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
9048 case Intrinsic::amdgcn_raw_buffer_atomic_xor:
9049 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
9051 case Intrinsic::amdgcn_raw_buffer_atomic_inc:
9052 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_inc:
9054 case Intrinsic::amdgcn_raw_buffer_atomic_dec:
9055 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_dec:
9057 case Intrinsic::amdgcn_raw_buffer_atomic_cond_sub_u32:
9058 return lowerRawBufferAtomicIntrin(
Op, DAG,
9060 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
9061 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_swap:
9062 return lowerStructBufferAtomicIntrin(
Op, DAG,
9064 case Intrinsic::amdgcn_struct_buffer_atomic_add:
9065 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
9067 case Intrinsic::amdgcn_struct_buffer_atomic_sub:
9068 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
9070 case Intrinsic::amdgcn_struct_buffer_atomic_smin:
9071 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
9072 return lowerStructBufferAtomicIntrin(
Op, DAG,
9074 case Intrinsic::amdgcn_struct_buffer_atomic_umin:
9075 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
9076 return lowerStructBufferAtomicIntrin(
Op, DAG,
9078 case Intrinsic::amdgcn_struct_buffer_atomic_smax:
9079 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
9080 return lowerStructBufferAtomicIntrin(
Op, DAG,
9082 case Intrinsic::amdgcn_struct_buffer_atomic_umax:
9083 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
9084 return lowerStructBufferAtomicIntrin(
Op, DAG,
9086 case Intrinsic::amdgcn_struct_buffer_atomic_and:
9087 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
9089 case Intrinsic::amdgcn_struct_buffer_atomic_or:
9090 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
9092 case Intrinsic::amdgcn_struct_buffer_atomic_xor:
9093 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
9095 case Intrinsic::amdgcn_struct_buffer_atomic_inc:
9096 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_inc:
9098 case Intrinsic::amdgcn_struct_buffer_atomic_dec:
9099 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_dec:
9101 case Intrinsic::amdgcn_struct_buffer_atomic_cond_sub_u32:
9102 return lowerStructBufferAtomicIntrin(
Op, DAG,
9105 case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
9106 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap: {
9107 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(4), DAG);
9108 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
9122 EVT VT =
Op.getValueType();
9126 Op->getVTList(), Ops, VT, M->getMemOperand());
9128 case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
9129 case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap: {
9130 SDValue Rsrc = bufferRsrcPtrToVector(
Op->getOperand(4), DAG);
9131 auto Offsets = splitBufferOffsets(
Op.getOperand(6), DAG);
9145 EVT VT =
Op.getValueType();
9149 Op->getVTList(), Ops, VT, M->getMemOperand());
9151 case Intrinsic::amdgcn_image_bvh_intersect_ray: {
9160 assert(NodePtr.getValueType() == MVT::i32 ||
9161 NodePtr.getValueType() == MVT::i64);
9174 const bool Is64 = NodePtr.getValueType() == MVT::i64;
9175 const unsigned NumVDataDwords = 4;
9176 const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
9177 const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
9181 const unsigned BaseOpcodes[2][2] = {
9182 {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
9183 {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
9184 AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
9188 IsGFX12Plus ? AMDGPU::MIMGEncGfx12
9189 : IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
9190 : AMDGPU::MIMGEncGfx10NSA,
9191 NumVDataDwords, NumVAddrDwords);
9195 IsGFX11 ? AMDGPU::MIMGEncGfx11Default
9196 : AMDGPU::MIMGEncGfx10Default,
9197 NumVDataDwords, NumVAddrDwords);
9203 auto packLanes = [&DAG, &Ops, &
DL] (
SDValue Op,
bool IsAligned) {
9206 if (Lanes[0].getValueSizeInBits() == 32) {
9207 for (
unsigned I = 0;
I < 3; ++
I)
9214 { Lanes[0], Lanes[1] })));
9221 { Elt0, Lanes[0] })));
9225 { Lanes[1], Lanes[2] })));
9230 if (UseNSA && IsGFX11Plus) {
9238 for (
unsigned I = 0;
I < 3; ++
I) {
9241 {DirLanes[I], InvDirLanes[I]})));
9256 packLanes(RayOrigin,
true);
9257 packLanes(RayDir,
true);
9258 packLanes(RayInvDir,
false);
9263 if (NumVAddrDwords > 12) {
9283 case Intrinsic::amdgcn_global_atomic_fmin:
9284 case Intrinsic::amdgcn_global_atomic_fmax:
9285 case Intrinsic::amdgcn_global_atomic_fmin_num:
9286 case Intrinsic::amdgcn_global_atomic_fmax_num:
9287 case Intrinsic::amdgcn_flat_atomic_fmin:
9288 case Intrinsic::amdgcn_flat_atomic_fmax:
9289 case Intrinsic::amdgcn_flat_atomic_fmin_num:
9290 case Intrinsic::amdgcn_flat_atomic_fmax_num: {
9297 unsigned Opcode = 0;
9299 case Intrinsic::amdgcn_global_atomic_fmin:
9300 case Intrinsic::amdgcn_global_atomic_fmin_num:
9301 case Intrinsic::amdgcn_flat_atomic_fmin:
9302 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
9303 Opcode = ISD::ATOMIC_LOAD_FMIN;
9306 case Intrinsic::amdgcn_global_atomic_fmax:
9307 case Intrinsic::amdgcn_global_atomic_fmax_num:
9308 case Intrinsic::amdgcn_flat_atomic_fmax:
9309 case Intrinsic::amdgcn_flat_atomic_fmax_num: {
9310 Opcode = ISD::ATOMIC_LOAD_FMAX;
9317 Ops, M->getMemOperand());
9319 case Intrinsic::amdgcn_s_get_barrier_state: {
9323 bool IsInlinableBarID =
false;
9331 if (IsInlinableBarID) {
9332 Opc = AMDGPU::S_GET_BARRIER_STATE_IMM;
9336 Opc = AMDGPU::S_GET_BARRIER_STATE_M0;
9348 return lowerImage(
Op, ImageDimIntr, DAG,
true);
9356SDValue SITargetLowering::getMemIntrinsicNode(
unsigned Opcode,
const SDLoc &
DL,
9366 bool IsTFE = VTList.
NumVTs == 3;
9369 unsigned NumOpDWords = NumValueDWords + 1;
9374 SDValue Op = getMemIntrinsicNode(Opcode,
DL, OpDWordsVTList, Ops,
9375 OpDWordsVT, OpDWordsMMO, DAG);
9390 (VT == MVT::v3i32 || VT == MVT::v3f32)) {
9396 WidenedMemVT, WidenedMMO);
9406 bool ImageStore)
const {
9441 for (
unsigned I = 0;
I < Elts.
size() / 2;
I += 1) {
9447 if ((NumElements % 2) == 1) {
9449 unsigned I = Elts.
size() / 2;
9465 if (NumElements == 3) {
9475 return DAG.
getNode(ISD::BITCAST,
DL, WidenedStoreVT, ZExt);
9486 unsigned IntrinsicID =
Op.getConstantOperandVal(1);
9489 switch (IntrinsicID) {
9490 case Intrinsic::amdgcn_exp_compr: {
9494 "intrinsic not supported on subtarget",
DL.getDebugLoc());
9507 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, Src0),
9508 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, Src1),
9517 unsigned Opc =
Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
9520 case Intrinsic::amdgcn_s_barrier: {
9523 unsigned WGSize =
ST.getFlatWorkGroupSizes(MF.
getFunction()).second;
9524 if (WGSize <=
ST.getWavefrontSize())
9526 Op.getOperand(0)), 0);
9530 if (
ST.hasSplitBarriers()) {
9535 MVT::Other, K,
Op.getOperand(0)),
9547 case Intrinsic::amdgcn_struct_tbuffer_store:
9548 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
9550 bool IsD16 = (VData.
getValueType().getScalarType() == MVT::f16);
9552 VData = handleD16VData(VData, DAG);
9553 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9554 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
9572 M->getMemoryVT(), M->getMemOperand());
9575 case Intrinsic::amdgcn_raw_tbuffer_store:
9576 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
9578 bool IsD16 = (VData.
getValueType().getScalarType() == MVT::f16);
9580 VData = handleD16VData(VData, DAG);
9581 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9582 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
9600 M->getMemoryVT(), M->getMemOperand());
9603 case Intrinsic::amdgcn_raw_buffer_store:
9604 case Intrinsic::amdgcn_raw_ptr_buffer_store:
9605 case Intrinsic::amdgcn_raw_buffer_store_format:
9606 case Intrinsic::amdgcn_raw_ptr_buffer_store_format: {
9607 const bool IsFormat =
9608 IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format ||
9609 IntrinsicID == Intrinsic::amdgcn_raw_ptr_buffer_store_format;
9616 VData = handleD16VData(VData, DAG);
9626 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9627 auto Offsets = splitBufferOffsets(
Op.getOperand(4), DAG);
9647 return handleByteShortBufferStores(DAG, VDataVT,
DL, Ops, M);
9650 M->getMemoryVT(), M->getMemOperand());
9653 case Intrinsic::amdgcn_struct_buffer_store:
9654 case Intrinsic::amdgcn_struct_ptr_buffer_store:
9655 case Intrinsic::amdgcn_struct_buffer_store_format:
9656 case Intrinsic::amdgcn_struct_ptr_buffer_store_format: {
9657 const bool IsFormat =
9658 IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format ||
9659 IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_store_format;
9667 VData = handleD16VData(VData, DAG);
9677 auto Rsrc = bufferRsrcPtrToVector(
Op.getOperand(3), DAG);
9678 auto Offsets = splitBufferOffsets(
Op.getOperand(5), DAG);
9699 return handleByteShortBufferStores(DAG, VDataType,
DL, Ops, M);
9702 M->getMemoryVT(), M->getMemOperand());
9704 case Intrinsic::amdgcn_raw_buffer_load_lds:
9705 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
9706 case Intrinsic::amdgcn_struct_buffer_load_lds:
9707 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
9711 IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds ||
9712 IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_load_lds;
9713 unsigned OpOffset = HasVIndex ? 1 : 0;
9714 SDValue VOffset =
Op.getOperand(5 + OpOffset);
9716 unsigned Size =
Op->getConstantOperandVal(4);
9722 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
9723 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
9724 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
9725 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
9728 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
9729 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
9730 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
9731 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
9734 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
9735 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
9736 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
9737 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
9745 if (HasVIndex && HasVOffset)
9751 else if (HasVOffset)
9754 SDValue Rsrc = bufferRsrcPtrToVector(
Op.getOperand(2), DAG);
9758 unsigned Aux =
Op.getConstantOperandVal(8 + OpOffset);
9793 case Intrinsic::amdgcn_global_load_lds: {
9795 unsigned Size =
Op->getConstantOperandVal(4);
9800 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
9803 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
9806 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
9823 if (LHS->isDivergent())
9827 RHS.getOperand(0).getValueType() == MVT::i32) {
9835 if (!
Addr->isDivergent()) {
9851 LoadPtrI.
Offset =
Op->getConstantOperandVal(5);
9871 case Intrinsic::amdgcn_end_cf:
9873 Op->getOperand(2), Chain), 0);
9874 case Intrinsic::amdgcn_s_barrier_init:
9875 case Intrinsic::amdgcn_s_barrier_join:
9876 case Intrinsic::amdgcn_s_wakeup_barrier: {
9881 bool IsInlinableBarID =
false;
9889 if (IsInlinableBarID) {
9890 switch (IntrinsicID) {
9893 case Intrinsic::amdgcn_s_barrier_init:
9894 Opc = AMDGPU::S_BARRIER_INIT_IMM;
9896 case Intrinsic::amdgcn_s_barrier_join:
9897 Opc = AMDGPU::S_BARRIER_JOIN_IMM;
9899 case Intrinsic::amdgcn_s_wakeup_barrier:
9900 Opc = AMDGPU::S_WAKEUP_BARRIER_IMM;
9907 switch (IntrinsicID) {
9910 case Intrinsic::amdgcn_s_barrier_init:
9911 Opc = AMDGPU::S_BARRIER_INIT_M0;
9913 case Intrinsic::amdgcn_s_barrier_join:
9914 Opc = AMDGPU::S_BARRIER_JOIN_M0;
9916 case Intrinsic::amdgcn_s_wakeup_barrier:
9917 Opc = AMDGPU::S_WAKEUP_BARRIER_M0;
9922 if (IntrinsicID == Intrinsic::amdgcn_s_barrier_init) {
9928 if (!IsInlinableBarID) {
9933 Op.getOperand(2), M0Val),
9937 }
else if (!IsInlinableBarID) {
9947 return lowerImage(
Op, ImageDimIntr, DAG,
true);
9960std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
9984 unsigned Overflow = ImmOffset & ~MaxImm;
9985 ImmOffset -= Overflow;
9986 if ((int32_t)Overflow < 0) {
9987 Overflow += ImmOffset;
9996 SDValue Ops[] = { N0, OverflowVal };
10011void SITargetLowering::setBufferOffsets(
SDValue CombinedOffset,
10013 Align Alignment)
const {
10019 if (
TII->splitMUBUFOffset(Imm, SOffset, ImmOffset, Alignment)) {
10032 TII->splitMUBUFOffset(
Offset, SOffset, ImmOffset, Alignment)) {
10049SDValue SITargetLowering::bufferRsrcPtrToVector(
SDValue MaybePointer,
10052 return MaybePointer;
10068 SDValue NumRecords =
Op->getOperand(3);
10071 auto [LowHalf, HighHalf] = DAG.
SplitScalar(Pointer, Loc, MVT::i32, MVT::i32);
10074 std::optional<uint32_t> ConstStride = std::nullopt;
10076 ConstStride = ConstNode->getZExtValue();
10079 if (!ConstStride || *ConstStride != 0) {
10082 ShiftedStride = DAG.
getConstant(*ConstStride << 16, Loc, MVT::i32);
10093 NewHighHalf, NumRecords, Flags);
10094 SDValue RsrcPtr = DAG.
getNode(ISD::BITCAST, Loc, MVT::i128, Rsrc);
10103 bool IsTFE)
const {
10113 SDValue Op = getMemIntrinsicNode(Opc,
DL, VTs, Ops, MVT::v2i32, OpMMO, DAG);
10130 LoadVal = DAG.
getNode(ISD::BITCAST,
DL, LoadVT, LoadVal);
10140 if (VDataType == MVT::f16 || VDataType == MVT::bf16)
10141 Ops[1] = DAG.
getNode(ISD::BITCAST,
DL, MVT::i16, Ops[1]);
10144 Ops[1] = BufferStoreExt;
10149 M->getMemOperand());
10174SDValue SITargetLowering::widenLoad(
LoadSDNode *Ld, DAGCombinerInfo &DCI)
const {
10190 if ((MemVT.
isSimple() && !DCI.isAfterLegalizeDAG()) ||
10197 "unexpected vector extload");
10210 "unexpected fp extload");
10228 DCI.AddToWorklist(Cvt.
getNode());
10233 DCI.AddToWorklist(Cvt.
getNode());
10236 Cvt = DAG.
getNode(ISD::BITCAST, SL, VT, Cvt);
10244 if (Info.isEntryFunction())
10245 return Info.getUserSGPRInfo().hasFlatScratchInit();
10253 EVT MemVT =
Load->getMemoryVT();
10266 EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 :
MVT::i16;
10269 BasePtr, RealMemVT, MMO);
10299 assert(
Op.getValueType().getVectorElementType() == MVT::i32 &&
10300 "Custom lowering for non-i32 vectors hasn't been implemented.");
10303 unsigned AS =
Load->getAddressSpace();
10322 if (!
Op->isDivergent() && Alignment >=
Align(4) && NumElements < 32) {
10339 Alignment >=
Align(4) && NumElements < 32) {
10354 if (NumElements > 4)
10374 if (NumElements > 2)
10379 if (NumElements > 4)
10391 auto Flags =
Load->getMemOperand()->getFlags();
10393 Load->getAlign(), Flags, &
Fast) &&
10402 MemVT, *
Load->getMemOperand())) {
10412 EVT VT =
Op.getValueType();
10439 return DAG.
getNode(ISD::BITCAST,
DL, VT, Res);
10449 EVT VT =
Op.getValueType();
10452 bool AllowInaccurateRcp = Flags.hasApproximateFuncs() ||
10459 if (!AllowInaccurateRcp && VT != MVT::f16)
10462 if (CLHS->isExactlyValue(1.0)) {
10479 if (CLHS->isExactlyValue(-1.0)) {
10488 if (!AllowInaccurateRcp && (VT != MVT::f16 || !Flags.hasAllowReciprocal()))
10502 EVT VT =
Op.getValueType();
10505 bool AllowInaccurateDiv = Flags.hasApproximateFuncs() ||
10507 if (!AllowInaccurateDiv)
10528 return DAG.
getNode(Opcode, SL, VT,
A,
B, Flags);
10541 return DAG.
getNode(Opcode, SL, VTList,
10550 return DAG.
getNode(Opcode, SL, VT, {
A,
B,
C}, Flags);
10563 return DAG.
getNode(Opcode, SL, VTList,
10569 if (
SDValue FastLowered = lowerFastUnsafeFDIV(
Op, DAG))
10570 return FastLowered;
10576 SDValue CvtSrc0 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
10577 SDValue CvtSrc1 = DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
10597 const APFloat K0Val(0x1p+96f);
10600 const APFloat K1Val(0x1p-32f);
10627 assert(ST->hasDenormModeInst() &&
"Requires S_DENORM_MODE");
10628 uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue();
10629 uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2);
10634 if (
SDValue FastLowered = lowerFastUnsafeFDIV(
Op, DAG))
10635 return FastLowered;
10642 Flags.setNoFPExcept(
true);
10653 {
RHS,
RHS, LHS}, Flags);
10655 {
LHS,
RHS, LHS}, Flags);
10659 DenominatorScaled, Flags);
10661 DenominatorScaled, Flags);
10663 using namespace AMDGPU::Hwreg;
10664 const unsigned Denorm32Reg = HwregEncoding::encode(ID_MODE, 4, 2);
10669 const DenormalMode DenormMode = Info->getMode().FP32Denormals;
10672 const bool HasDynamicDenormals =
10678 if (!PreservesDenormals) {
10686 if (HasDynamicDenormals) {
10690 SavedDenormMode =
SDValue(GetReg, 0);
10698 const SDValue EnableDenormValue =
10707 EnableDenorm = DAG.
getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
10708 {EnableDenormValue,
BitField, Glue});
10721 ApproxRcp, One, NegDivScale0, Flags);
10724 ApproxRcp, Fma0, Flags);
10727 Fma1, Fma1, Flags);
10730 NumeratorScaled,
Mul, Flags);
10733 Fma2, Fma1,
Mul, Fma2, Flags);
10736 NumeratorScaled, Fma3, Flags);
10738 if (!PreservesDenormals) {
10745 Fma4.
getValue(1), DisableDenormValue,
10748 assert(HasDynamicDenormals == (
bool)SavedDenormMode);
10749 const SDValue DisableDenormValue =
10750 HasDynamicDenormals
10755 AMDGPU::S_SETREG_B32, SL, MVT::Other,
10766 {Fma4, Fma1, Fma3, Scale}, Flags);
10772 if (
SDValue FastLowered = lowerFastUnsafeFDIV64(
Op, DAG))
10773 return FastLowered;
10785 SDValue NegDivScale0 = DAG.
getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
10801 NegDivScale0,
Mul, DivScale1);
10814 SDValue Scale0BC = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
10815 SDValue Scale1BC = DAG.
getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
10833 Fma4, Fma3,
Mul, Scale);
10839 EVT VT =
Op.getValueType();
10841 if (VT == MVT::f32)
10842 return LowerFDIV32(
Op, DAG);
10844 if (VT == MVT::f64)
10845 return LowerFDIV64(
Op, DAG);
10847 if (VT == MVT::f16)
10848 return LowerFDIV16(
Op, DAG);
10857 EVT ResultExpVT =
Op->getValueType(1);
10858 EVT InstrExpVT = VT == MVT::f16 ? MVT::i16 : MVT::i32;
10888 if (VT == MVT::i1) {
10891 Store->getBasePtr(), MVT::i1,
Store->getMemOperand());
10895 Store->getValue().getValueType().getScalarType() == MVT::i32);
10897 unsigned AS =
Store->getAddressSpace();
10916 if (NumElements > 4)
10923 VT, *
Store->getMemOperand()))
10933 if (NumElements > 2)
10937 if (NumElements > 4 ||
10946 auto Flags =
Store->getMemOperand()->getFlags();
10968 DAG.
getNode(ISD::FP_EXTEND, SL, MVT::f32,
Op.getOperand(0), Flags);
10981 MVT VT =
Op.getValueType().getSimpleVT();
11010 SDValue SqrtSNextDown = DAG.
getNode(ISD::BITCAST,
DL, VT, SqrtSNextDownInt);
11013 DAG.
getNode(ISD::FNEG,
DL, VT, SqrtSNextDown, Flags);
11022 SDValue NegSqrtSNextUp = DAG.
getNode(ISD::FNEG,
DL, VT, SqrtSNextUp, Flags);
11133 SqrtRet = DAG.
getNode(ISD::FLDEXP,
DL, MVT::f64, SqrtRet, ScaleDown, Flags);
11150 EVT VT =
Op.getValueType();
11156 auto Flags =
Op->getFlags();
11167 switch (
Op.getOpcode()) {
11193 EVT VT =
Op.getValueType();
11209 DAGCombinerInfo &DCI)
const {
11210 EVT VT =
N->getValueType(0);
11212 if (ScalarVT != MVT::f32 && ScalarVT != MVT::f16)
11219 EVT SrcVT = Src.getValueType();
11225 if (DCI.isAfterLegalizeDAG() && SrcVT == MVT::i32) {
11228 DCI.AddToWorklist(Cvt.
getNode());
11231 if (ScalarVT != MVT::f32) {
11243 DAGCombinerInfo &DCI)
const {
11253 SDValue MagAsVector = DAG.
getNode(ISD::BITCAST,
DL, MVT::v2f32, MagnitudeOp);
11303 unsigned AddrSpace,
11305 DAGCombinerInfo &DCI)
const {
11335 AM.HasBaseReg =
true;
11336 AM.BaseOffs =
Offset.getSExtValue();
11341 EVT VT =
N->getValueType(0);
11358 switch (
N->getOpcode()) {
11369 DAGCombinerInfo &DCI)
const {
11378 SDValue NewPtr = performSHLPtrCombine(
Ptr.getNode(),
N->getAddressSpace(),
11379 N->getMemoryVT(), DCI);
11383 NewOps[PtrIdx] = NewPtr;
11392 return (Opc ==
ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
11393 (Opc ==
ISD::OR && (Val == 0xffffffff || Val == 0)) ||
11402SDValue SITargetLowering::splitBinaryBitConstantOp(
11403 DAGCombinerInfo &DCI,
11425 if (V.getValueType() != MVT::i1)
11427 switch (V.getOpcode()) {
11446 if (!(
C & 0x000000ff)) ZeroByteMask |= 0x000000ff;
11447 if (!(
C & 0x0000ff00)) ZeroByteMask |= 0x0000ff00;
11448 if (!(
C & 0x00ff0000)) ZeroByteMask |= 0x00ff0000;
11449 if (!(
C & 0xff000000)) ZeroByteMask |= 0xff000000;
11450 uint32_t NonZeroByteMask = ~ZeroByteMask;
11451 if ((NonZeroByteMask &
C) != NonZeroByteMask)
11464 assert(V.getValueSizeInBits() == 32);
11466 if (V.getNumOperands() != 2)
11475 switch (V.getOpcode()) {
11480 return (0x03020100 & ConstMask) | (0x0c0c0c0c & ~ConstMask);
11485 return (0x03020100 & ~ConstMask) | ConstMask;
11492 return uint32_t((0x030201000c0c0c0cull <<
C) >> 32);
11498 return uint32_t(0x0c0c0c0c03020100ull >>
C);
11505 DAGCombinerInfo &DCI)
const {
11506 if (DCI.isBeforeLegalize())
11510 EVT VT =
N->getValueType(0);
11516 if (VT == MVT::i64 && CRHS) {
11522 if (CRHS && VT == MVT::i32) {
11532 unsigned Shift = CShift->getZExtValue();
11534 unsigned Offset = NB + Shift;
11535 if ((
Offset & (Bits - 1)) == 0) {
11538 LHS->getOperand(0),
11559 Sel = (LHS.getConstantOperandVal(2) & Sel) | (~Sel & 0x0c0c0c0c);
11574 if (
Y.getOpcode() != ISD::FABS ||
Y.getOperand(0) !=
X ||
11579 if (
X != LHS.getOperand(1))
11617 (RHS.getOperand(0) == LHS.getOperand(0) &&
11618 LHS.getOperand(0) == LHS.getOperand(1))) {
11621 Mask->getZExtValue() & ~OrdMask :
11622 Mask->getZExtValue() & OrdMask;
11630 if (VT == MVT::i32 &&
11642 if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
11643 N->isDivergent() &&
TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
11646 if (LHSMask != ~0u && RHSMask != ~0u) {
11649 if (LHSMask > RHSMask) {
11656 uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
11657 uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
11660 if (!(LHSUsedLanes & RHSUsedLanes) &&
11663 !(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
11670 for (
unsigned I = 0;
I < 32;
I += 8) {
11672 if ((LHSMask & ByteSel) == 0x0c || (RHSMask & ByteSel) == 0x0c)
11673 Mask &= (0x0c <<
I) & 0xffffffff;
11682 LHS.getOperand(0), RHS.getOperand(0),
11731static const std::optional<ByteProvider<SDValue>>
11733 unsigned Depth = 0) {
11736 return std::nullopt;
11738 if (
Op.getValueSizeInBits() < 8)
11739 return std::nullopt;
11741 if (
Op.getValueType().isVector())
11744 switch (
Op->getOpcode()) {
11756 NarrowVT = VTSign->getVT();
11759 return std::nullopt;
11762 if (SrcIndex >= NarrowByteWidth)
11763 return std::nullopt;
11771 return std::nullopt;
11773 uint64_t BitShift = ShiftOp->getZExtValue();
11775 if (BitShift % 8 != 0)
11776 return std::nullopt;
11778 SrcIndex += BitShift / 8;
11796static const std::optional<ByteProvider<SDValue>>
11798 unsigned StartingIndex = 0) {
11802 return std::nullopt;
11804 unsigned BitWidth =
Op.getScalarValueSizeInBits();
11806 return std::nullopt;
11808 return std::nullopt;
11810 bool IsVec =
Op.getValueType().isVector();
11811 switch (
Op.getOpcode()) {
11814 return std::nullopt;
11819 return std::nullopt;
11823 return std::nullopt;
11826 if (!LHS->isConstantZero() && !RHS->isConstantZero())
11827 return std::nullopt;
11828 if (!LHS || LHS->isConstantZero())
11830 if (!RHS || RHS->isConstantZero())
11832 return std::nullopt;
11837 return std::nullopt;
11841 return std::nullopt;
11843 uint32_t BitMask = BitMaskOp->getZExtValue();
11847 if ((IndexMask & BitMask) != IndexMask) {
11850 if (IndexMask & BitMask)
11851 return std::nullopt;
11860 return std::nullopt;
11864 if (!ShiftOp ||
Op.getValueType().isVector())
11865 return std::nullopt;
11867 uint64_t BitsProvided =
Op.getValueSizeInBits();
11868 if (BitsProvided % 8 != 0)
11869 return std::nullopt;
11871 uint64_t BitShift = ShiftOp->getAPIntValue().urem(BitsProvided);
11873 return std::nullopt;
11875 uint64_t ConcatSizeInBytes = BitsProvided / 4;
11876 uint64_t ByteShift = BitShift / 8;
11878 uint64_t NewIndex = (
Index + ByteShift) % ConcatSizeInBytes;
11879 uint64_t BytesProvided = BitsProvided / 8;
11880 SDValue NextOp =
Op.getOperand(NewIndex >= BytesProvided ? 0 : 1);
11881 NewIndex %= BytesProvided;
11888 return std::nullopt;
11892 return std::nullopt;
11894 uint64_t BitShift = ShiftOp->getZExtValue();
11896 return std::nullopt;
11898 auto BitsProvided =
Op.getScalarValueSizeInBits();
11899 if (BitsProvided % 8 != 0)
11900 return std::nullopt;
11902 uint64_t BytesProvided = BitsProvided / 8;
11903 uint64_t ByteShift = BitShift / 8;
11908 return BytesProvided - ByteShift >
Index
11916 return std::nullopt;
11920 return std::nullopt;
11922 uint64_t BitShift = ShiftOp->getZExtValue();
11923 if (BitShift % 8 != 0)
11924 return std::nullopt;
11925 uint64_t ByteShift = BitShift / 8;
11931 return Index < ByteShift
11934 Depth + 1, StartingIndex);
11943 return std::nullopt;
11951 NarrowBitWidth = VTSign->getVT().getSizeInBits();
11953 if (NarrowBitWidth % 8 != 0)
11954 return std::nullopt;
11955 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
11957 if (
Index >= NarrowByteWidth)
11959 ? std::optional<ByteProvider<SDValue>>(
11967 return std::nullopt;
11971 if (NarrowByteWidth >=
Index) {
11976 return std::nullopt;
11983 return std::nullopt;
11989 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
11990 if (NarrowBitWidth % 8 != 0)
11991 return std::nullopt;
11992 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
11997 if (
Index >= NarrowByteWidth) {
11999 ? std::optional<ByteProvider<SDValue>>(
12004 if (NarrowByteWidth >
Index) {
12008 return std::nullopt;
12013 return std::nullopt;
12016 Depth + 1, StartingIndex);
12022 return std::nullopt;
12023 auto VecIdx = IdxOp->getZExtValue();
12024 auto ScalarSize =
Op.getScalarValueSizeInBits();
12025 if (ScalarSize < 32)
12026 Index = ScalarSize == 8 ? VecIdx : VecIdx * 2 +
Index;
12028 StartingIndex,
Index);
12033 return std::nullopt;
12037 return std::nullopt;
12040 (PermMask->getZExtValue() & (0xFF << (
Index * 8))) >> (
Index * 8);
12041 if (IdxMask > 0x07 && IdxMask != 0x0c)
12042 return std::nullopt;
12044 auto NextOp =
Op.getOperand(IdxMask > 0x03 ? 0 : 1);
12045 auto NextIndex = IdxMask > 0x03 ? IdxMask % 4 : IdxMask;
12047 return IdxMask != 0x0c ?
calculateSrcByte(NextOp, StartingIndex, NextIndex)
12053 return std::nullopt;
12068 return !OpVT.
isVector() && OpVT.getSizeInBits() == 16;
12075 auto MemVT = L->getMemoryVT();
12078 return L->getMemoryVT().getSizeInBits() == 16;
12088 int Low8 = Mask & 0xff;
12089 int Hi8 = (Mask & 0xff00) >> 8;
12091 assert(Low8 < 8 && Hi8 < 8);
12093 bool IsConsecutive = (Hi8 - Low8 == 1);
12098 bool Is16Aligned = !(Low8 % 2);
12100 return IsConsecutive && Is16Aligned;
12108 int Low16 = PermMask & 0xffff;
12109 int Hi16 = (PermMask & 0xffff0000) >> 16;
12119 auto OtherOpIs16Bit = TempOtherOp.getValueSizeInBits() == 16 ||
12121 if (!OtherOpIs16Bit)
12129 unsigned DWordOffset) {
12132 auto TypeSize = Src.getValueSizeInBits().getFixedValue();
12134 assert(Src.getValueSizeInBits().isKnownMultipleOf(8));
12139 if (Src.getValueType().isVector()) {
12140 auto ScalarTySize = Src.getScalarValueSizeInBits();
12141 auto ScalarTy = Src.getValueType().getScalarType();
12142 if (ScalarTySize == 32) {
12146 if (ScalarTySize > 32) {
12149 DAG.
getConstant(DWordOffset / (ScalarTySize / 32), SL, MVT::i32));
12150 auto ShiftVal = 32 * (DWordOffset % (ScalarTySize / 32));
12157 assert(ScalarTySize < 32);
12158 auto NumElements =
TypeSize / ScalarTySize;
12159 auto Trunc32Elements = (ScalarTySize * NumElements) / 32;
12160 auto NormalizedTrunc = Trunc32Elements * 32 / ScalarTySize;
12161 auto NumElementsIn32 = 32 / ScalarTySize;
12162 auto NumAvailElements = DWordOffset < Trunc32Elements
12164 : NumElements - NormalizedTrunc;
12177 auto ShiftVal = 32 * DWordOffset;
12185 [[maybe_unused]]
EVT VT =
N->getValueType(0);
12190 for (
int i = 0; i < 4; i++) {
12192 std::optional<ByteProvider<SDValue>>
P =
12195 if (!
P ||
P->isConstantZero())
12200 if (PermNodes.
size() != 4)
12203 std::pair<unsigned, unsigned> FirstSrc(0, PermNodes[0].SrcOffset / 4);
12204 std::optional<std::pair<unsigned, unsigned>> SecondSrc;
12206 for (
size_t i = 0; i < PermNodes.
size(); i++) {
12207 auto PermOp = PermNodes[i];
12210 int SrcByteAdjust = 4;
12214 if (!PermOp.hasSameSrc(PermNodes[FirstSrc.first]) ||
12215 ((PermOp.SrcOffset / 4) != FirstSrc.second)) {
12217 if (!PermOp.hasSameSrc(PermNodes[SecondSrc->first]) ||
12218 ((PermOp.SrcOffset / 4) != SecondSrc->second))
12222 SecondSrc = {i, PermNodes[i].SrcOffset / 4};
12223 assert(!(PermNodes[SecondSrc->first].Src->getValueSizeInBits() % 8));
12226 assert((PermOp.SrcOffset % 4) + SrcByteAdjust < 8);
12228 PermMask |= ((PermOp.SrcOffset % 4) + SrcByteAdjust) << (i * 8);
12231 SDValue Op = *PermNodes[FirstSrc.first].Src;
12233 assert(
Op.getValueSizeInBits() == 32);
12237 int Low16 = PermMask & 0xffff;
12238 int Hi16 = (PermMask & 0xffff0000) >> 16;
12240 bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
12241 bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
12244 if (WellFormedLow && WellFormedHi)
12248 SDValue OtherOp = SecondSrc ? *PermNodes[SecondSrc->first].Src :
Op;
12257 assert(
Op.getValueType().isByteSized() &&
12275 DAGCombinerInfo &DCI)
const {
12280 EVT VT =
N->getValueType(0);
12281 if (VT == MVT::i1) {
12285 SDValue Src = LHS.getOperand(0);
12286 if (Src != RHS.getOperand(0))
12291 if (!CLHS || !CRHS)
12295 static const uint32_t MaxMask = 0x3ff;
12314 Sel |= LHS.getConstantOperandVal(2);
12322 if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
12323 N->isDivergent() &&
TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
12327 auto usesCombinedOperand = [](
SDNode *OrUse) {
12329 if (OrUse->getOpcode() != ISD::BITCAST ||
12330 !OrUse->getValueType(0).isVector())
12334 for (
auto VUse : OrUse->uses()) {
12335 if (!VUse->getValueType(0).isVector())
12342 if (VUse->getOpcode() == VectorwiseOp)
12348 if (!
any_of(
N->uses(), usesCombinedOperand))
12354 if (LHSMask != ~0u && RHSMask != ~0u) {
12357 if (LHSMask > RHSMask) {
12364 uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
12365 uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
12368 if (!(LHSUsedLanes & RHSUsedLanes) &&
12371 !(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
12373 LHSMask &= ~RHSUsedLanes;
12374 RHSMask &= ~LHSUsedLanes;
12376 LHSMask |= LHSUsedLanes & 0x04040404;
12382 LHS.getOperand(0), RHS.getOperand(0),
12386 if (LHSMask == ~0u || RHSMask == ~0u) {
12392 if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
12407 if (SrcVT == MVT::i32) {
12413 DCI.AddToWorklist(LowOr.
getNode());
12414 DCI.AddToWorklist(HiBits.
getNode());
12418 return DAG.
getNode(ISD::BITCAST, SL, MVT::i64, Vec);
12426 N->getOperand(0), CRHS))
12434 DAGCombinerInfo &DCI)
const {
12435 if (
SDValue RV = reassociateScalarOps(
N, DCI.DAG))
12444 EVT VT =
N->getValueType(0);
12445 if (CRHS && VT == MVT::i64) {
12453 if (LHS.getOpcode() ==
ISD::SELECT && VT == MVT::i32) {
12461 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, LHS->getOperand(1));
12463 DAG.
getNode(ISD::BITCAST,
DL, MVT::f32, LHS->getOperand(2));
12467 LHS->getOperand(0), FNegLHS, FNegRHS);
12468 return DAG.
getNode(ISD::BITCAST,
DL, VT, NewSelect);
12476 DAGCombinerInfo &DCI)
const {
12481 EVT VT =
N->getValueType(0);
12482 if (VT != MVT::i32)
12486 if (Src.getValueType() != MVT::i16)
12493SITargetLowering::performSignExtendInRegCombine(
SDNode *
N,
12494 DAGCombinerInfo &DCI)
const {
12501 VTSign->getVT() == MVT::i8) ||
12503 VTSign->getVT() == MVT::i16))) {
12505 "s_buffer_load_{u8, i8} are supported "
12506 "in GFX12 (or newer) architectures.");
12507 EVT VT = Src.getValueType();
12512 SDVTList ResList = DCI.DAG.getVTList(MVT::i32);
12519 SDValue BufferLoad = DCI.DAG.getMemIntrinsicNode(
12520 Opc,
DL, ResList, Ops, M->getMemoryVT(), M->getMemOperand());
12525 VTSign->getVT() == MVT::i8) ||
12527 VTSign->getVT() == MVT::i16)) &&
12541 SDVTList ResList = DCI.DAG.getVTList(MVT::i32,
12542 Src.getOperand(0).getValueType());
12545 SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc,
SDLoc(
N),
12547 Ops, M->getMemoryVT(),
12548 M->getMemOperand());
12549 return DCI.DAG.getMergeValues({BufferLoadSignExt,
12556 DAGCombinerInfo &DCI)
const {
12564 if (
N->getOperand(0).isUndef())
12571 DAGCombinerInfo &DCI)
const {
12572 EVT VT =
N->getValueType(0);
12576 return DCI.DAG.getConstantFP(
12588 if ((VT == MVT::f16 && N0.
getOpcode() == ISD::FSQRT) &&
12599 unsigned Opcode =
Op.getOpcode();
12604 const auto &
F = CFP->getValueAPF();
12605 if (
F.isNaN() &&
F.isSignaling())
12607 if (!
F.isDenormal())
12633 case ISD::FP_EXTEND:
12634 case ISD::FP16_TO_FP:
12635 case ISD::FP_TO_FP16:
12636 case ISD::BF16_TO_FP:
12637 case ISD::FP_TO_BF16:
12670 if (
Op.getValueType() == MVT::i32) {
12676 if (RHS->getZExtValue() == 0xffff0000) {
12686 return Op.getValueType().getScalarType() != MVT::f16;
12690 case ISD::FMINNUM_IEEE:
12691 case ISD::FMAXNUM_IEEE:
12692 case ISD::FMINIMUM:
12693 case ISD::FMAXIMUM:
12754 if (
Op.getValueType() == MVT::i16) {
12757 TruncSrc.
getOpcode() == ISD::BITCAST &&
12765 unsigned IntrinsicID =
Op.getConstantOperandVal(0);
12767 switch (IntrinsicID) {
12768 case Intrinsic::amdgcn_cvt_pkrtz:
12769 case Intrinsic::amdgcn_cubeid:
12770 case Intrinsic::amdgcn_frexp_mant:
12771 case Intrinsic::amdgcn_fdot2:
12772 case Intrinsic::amdgcn_rcp:
12773 case Intrinsic::amdgcn_rsq:
12774 case Intrinsic::amdgcn_rsq_clamp:
12775 case Intrinsic::amdgcn_rcp_legacy:
12776 case Intrinsic::amdgcn_rsq_legacy:
12777 case Intrinsic::amdgcn_trig_preop:
12778 case Intrinsic::amdgcn_log:
12779 case Intrinsic::amdgcn_exp2:
12780 case Intrinsic::amdgcn_sqrt:
12801 unsigned Opcode =
MI->getOpcode();
12803 if (Opcode == AMDGPU::G_FCANONICALIZE)
12806 std::optional<FPValueAndVReg> FCR;
12809 if (FCR->Value.isSignaling())
12811 if (!FCR->Value.isDenormal())
12822 case AMDGPU::G_FADD:
12823 case AMDGPU::G_FSUB:
12824 case AMDGPU::G_FMUL:
12825 case AMDGPU::G_FCEIL:
12826 case AMDGPU::G_FFLOOR:
12827 case AMDGPU::G_FRINT:
12828 case AMDGPU::G_FNEARBYINT:
12829 case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
12830 case AMDGPU::G_INTRINSIC_TRUNC:
12831 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
12832 case AMDGPU::G_FMA:
12833 case AMDGPU::G_FMAD:
12834 case AMDGPU::G_FSQRT:
12835 case AMDGPU::G_FDIV:
12836 case AMDGPU::G_FREM:
12837 case AMDGPU::G_FPOW:
12838 case AMDGPU::G_FPEXT:
12839 case AMDGPU::G_FLOG:
12840 case AMDGPU::G_FLOG2:
12841 case AMDGPU::G_FLOG10:
12842 case AMDGPU::G_FPTRUNC:
12843 case AMDGPU::G_AMDGPU_RCP_IFLAG:
12844 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
12845 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
12846 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
12847 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
12849 case AMDGPU::G_FNEG:
12850 case AMDGPU::G_FABS:
12851 case AMDGPU::G_FCOPYSIGN:
12853 case AMDGPU::G_FMINNUM:
12854 case AMDGPU::G_FMAXNUM:
12855 case AMDGPU::G_FMINNUM_IEEE:
12856 case AMDGPU::G_FMAXNUM_IEEE:
12857 case AMDGPU::G_FMINIMUM:
12858 case AMDGPU::G_FMAXIMUM: {
12866 case AMDGPU::G_BUILD_VECTOR:
12871 case AMDGPU::G_INTRINSIC:
12872 case AMDGPU::G_INTRINSIC_CONVERGENT:
12874 case Intrinsic::amdgcn_fmul_legacy:
12875 case Intrinsic::amdgcn_fmad_ftz:
12876 case Intrinsic::amdgcn_sqrt:
12877 case Intrinsic::amdgcn_fmed3:
12878 case Intrinsic::amdgcn_sin:
12879 case Intrinsic::amdgcn_cos:
12880 case Intrinsic::amdgcn_log:
12881 case Intrinsic::amdgcn_exp2:
12882 case Intrinsic::amdgcn_log_clamp:
12883 case Intrinsic::amdgcn_rcp:
12884 case Intrinsic::amdgcn_rcp_legacy:
12885 case Intrinsic::amdgcn_rsq:
12886 case Intrinsic::amdgcn_rsq_clamp:
12887 case Intrinsic::amdgcn_rsq_legacy:
12888 case Intrinsic::amdgcn_div_scale:
12889 case Intrinsic::amdgcn_div_fmas:
12890 case Intrinsic::amdgcn_div_fixup:
12891 case Intrinsic::amdgcn_fract:
12892 case Intrinsic::amdgcn_cvt_pkrtz:
12893 case Intrinsic::amdgcn_cubeid:
12894 case Intrinsic::amdgcn_cubema:
12895 case Intrinsic::amdgcn_cubesc:
12896 case Intrinsic::amdgcn_cubetc:
12897 case Intrinsic::amdgcn_frexp_mant:
12898 case Intrinsic::amdgcn_fdot2:
12899 case Intrinsic::amdgcn_trig_preop:
12914SDValue SITargetLowering::getCanonicalConstantFP(
12917 if (
C.isDenormal()) {
12931 if (
C.isSignaling()) {
12953SDValue SITargetLowering::performFCanonicalizeCombine(
12955 DAGCombinerInfo &DCI)
const {
12958 EVT VT =
N->getValueType(0);
12967 EVT VT =
N->getValueType(0);
12968 return getCanonicalConstantFP(DAG,
SDLoc(
N), VT, CFP->getValueAPF());
12984 EVT EltVT =
Lo.getValueType();
12987 for (
unsigned I = 0;
I != 2; ++
I) {
12990 NewElts[
I] = getCanonicalConstantFP(DAG, SL, EltVT,
12991 CFP->getValueAPF());
12992 }
else if (
Op.isUndef()) {
13024 case ISD::FMAXNUM_IEEE:
13026 case ISD::FMAXIMUM:
13033 case ISD::FMINNUM_IEEE:
13035 case ISD::FMINIMUM:
13061 if (!MinK || !MaxK)
13074 if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->
hasMed3_16()))
13075 return DAG.
getNode(Med3Opc, SL, VT, Src, MaxVal, MinVal);
13117 if (Info->getMode().DX10Clamp) {
13126 if (VT == MVT::f32 || (VT == MVT::f16 && Subtarget->
hasMed3_16())) {
13154 case ISD::FMINNUM_IEEE:
13155 case ISD::FMAXNUM_IEEE:
13158 return (VT == MVT::f32) || (VT == MVT::f16 && Subtarget.
hasMin3Max3_16());
13159 case ISD::FMINIMUM:
13160 case ISD::FMAXIMUM:
13161 return (VT == MVT::f32 || VT == MVT::f16) && Subtarget.
hasIEEEMinMax3();
13166 return (VT == MVT::i32) || (VT == MVT::i16 && Subtarget.
hasMin3Max3_16());
13175 DAGCombinerInfo &DCI)
const {
13178 EVT VT =
N->getValueType(0);
13179 unsigned Opc =
N->getOpcode();
13193 N->getValueType(0),
13206 N->getValueType(0),
13216 if (
SDValue Med3 = performIntMed3ImmCombine(
13221 if (
SDValue Med3 = performIntMed3ImmCombine(
13227 if (
SDValue Med3 = performIntMed3ImmCombine(
13232 if (
SDValue Med3 = performIntMed3ImmCombine(
13238 if (((Opc == ISD::FMINNUM && Op0.
getOpcode() == ISD::FMAXNUM) ||
13239 (Opc == ISD::FMINNUM_IEEE && Op0.
getOpcode() == ISD::FMAXNUM_IEEE) ||
13242 (VT == MVT::f32 || VT == MVT::f64 ||
13246 if (
SDValue Res = performFPMed3ImmCombine(DAG,
SDLoc(
N), Op0, Op1))
13257 return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
13258 (CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
13267 DAGCombinerInfo &DCI)
const {
13268 EVT VT =
N->getValueType(0);
13291 if (Info->getMode().DX10Clamp) {
13311 DAGCombinerInfo &DCI)
const {
13315 return DCI.DAG.getUNDEF(
N->getValueType(0));
13323 bool IsDivergentIdx,
13328 unsigned VecSize = EltSize * NumElem;
13331 if (VecSize <= 64 && EltSize < 32)
13340 if (IsDivergentIdx)
13344 unsigned NumInsts = NumElem +
13345 ((EltSize + 31) / 32) * NumElem ;
13350 return NumInsts <= 16;
13354 return NumInsts <= 15;
13358 SDValue Idx =
N->getOperand(
N->getNumOperands() - 1);
13372SDValue SITargetLowering::performExtractVectorEltCombine(
13373 SDNode *
N, DAGCombinerInfo &DCI)
const {
13379 EVT ResVT =
N->getValueType(0);
13398 if (Vec.
hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
13417 case ISD::FMAXNUM_IEEE:
13418 case ISD::FMINNUM_IEEE:
13419 case ISD::FMAXIMUM:
13420 case ISD::FMINIMUM: {
13426 DCI.AddToWorklist(Elt0.
getNode());
13427 DCI.AddToWorklist(Elt1.
getNode());
13449 if (!DCI.isBeforeLegalize())
13457 VecSize > 32 && VecSize % 32 == 0 && Idx) {
13460 unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
13461 unsigned EltIdx = BitIndex / 32;
13462 unsigned LeftoverBitIdx = BitIndex % 32;
13466 DCI.AddToWorklist(Cast.
getNode());
13470 DCI.AddToWorklist(Elt.
getNode());
13473 DCI.AddToWorklist(Srl.
getNode());
13477 DCI.AddToWorklist(Trunc.
getNode());
13479 if (VecEltVT == ResVT) {
13480 return DAG.
getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
13491SITargetLowering::performInsertVectorEltCombine(
SDNode *
N,
13492 DAGCombinerInfo &DCI)
const {
13522 if (Src.getOpcode() == ISD::FP_EXTEND &&
13523 Src.getOperand(0).getValueType() == MVT::f16) {
13524 return Src.getOperand(0);
13528 APFloat Val = CFP->getValueAPF();
13529 bool LosesInfo =
true;
13539 DAGCombinerInfo &DCI)
const {
13541 "combine only useful on gfx8");
13544 EVT VT =
N->getValueType(0);
13545 if (VT != MVT::f16)
13580 return DAG.
getNode(ISD::FMINNUM_IEEE, SL, VT, B1, C1);
13583unsigned SITargetLowering::getFusedOpcode(
const SelectionDAG &DAG,
13585 const SDNode *N1)
const {
13590 if (((VT == MVT::f32 &&
13592 (VT == MVT::f16 && Subtarget->
hasMadF16() &&
13612 EVT VT =
N->getValueType(0);
13613 if (VT != MVT::i32 && VT != MVT::i64)
13619 unsigned Opc =
N->getOpcode();
13642 return DAG.
getNode(Opc, SL, VT, Add1, Op2);
13664 DAGCombinerInfo &DCI)
const {
13668 EVT VT =
N->getValueType(0);
13678 if (!
N->isDivergent() && Subtarget->
hasSMulHi())
13682 if (NumBits <= 32 || NumBits > 64)
13685 if (LHS.getOpcode() !=
ISD::MUL) {
13694 unsigned NumUsers = 0;
13719 bool MulSignedLo =
false;
13720 if (!MulLHSUnsigned32 || !MulRHSUnsigned32) {
13729 if (VT != MVT::i64) {
13752 getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);
13754 if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) {
13756 std::tie(AccumLo, AccumHi) = DAG.
SplitScalar(Accum, SL, MVT::i32, MVT::i32);
13758 if (!MulLHSUnsigned32) {
13765 if (!MulRHSUnsigned32) {
13776 if (VT != MVT::i64)
13783static std::optional<ByteProvider<SDValue>>
13786 if (!Byte0 || Byte0->isConstantZero()) {
13787 return std::nullopt;
13790 if (Byte1 && !Byte1->isConstantZero()) {
13791 return std::nullopt;
13797 unsigned FirstCs =
First & 0x0c0c0c0c;
13798 unsigned SecondCs = Second & 0x0c0c0c0c;
13799 unsigned FirstNoCs =
First & ~0x0c0c0c0c;
13800 unsigned SecondNoCs = Second & ~0x0c0c0c0c;
13802 assert((FirstCs & 0xFF) | (SecondCs & 0xFF));
13803 assert((FirstCs & 0xFF00) | (SecondCs & 0xFF00));
13804 assert((FirstCs & 0xFF0000) | (SecondCs & 0xFF0000));
13805 assert((FirstCs & 0xFF000000) | (SecondCs & 0xFF000000));
13807 return (FirstNoCs | SecondNoCs) | (FirstCs & SecondCs);
13831 for (
int BPI = 0; BPI < 2; BPI++) {
13834 BPP = {Src1, Src0};
13836 unsigned ZeroMask = 0x0c0c0c0c;
13837 unsigned FMask = 0xFF << (8 * (3 - Step));
13839 unsigned FirstMask =
13840 (BPP.first.SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask);
13841 unsigned SecondMask =
13842 (BPP.second.SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask);
13846 int FirstGroup = -1;
13847 for (
int I = 0;
I < 2;
I++) {
13849 auto MatchesFirst = [&BPP](
DotSrc &IterElt) {
13850 return IterElt.SrcOp == *BPP.first.Src &&
13851 (IterElt.DWordOffset == (BPP.first.SrcOffset / 4));
13861 if (FirstGroup != -1) {
13863 auto MatchesSecond = [&BPP](
DotSrc &IterElt) {
13864 return IterElt.SrcOp == *BPP.second.Src &&
13865 (IterElt.DWordOffset == (BPP.second.SrcOffset / 4));
13871 Srcs.
push_back({*BPP.second.Src, SecondMask, BPP.second.SrcOffset / 4});
13879 unsigned ZeroMask = 0x0c0c0c0c;
13880 unsigned FMask = 0xFF << (8 * (3 - Step));
13884 ((Src0.
SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask)),
13888 ((Src1.
SrcOffset % 4) << (8 * (3 - Step)) | (ZeroMask & ~FMask)),
13899 if (Srcs.
size() == 1) {
13900 auto Elt = Srcs.
begin();
13904 if (Elt->PermMask == 0x3020100)
13911 auto FirstElt = Srcs.
begin();
13912 auto SecondElt = std::next(FirstElt);
13919 auto FirstMask = FirstElt->PermMask;
13920 auto SecondMask = SecondElt->PermMask;
13922 unsigned FirstCs = FirstMask & 0x0c0c0c0c;
13923 unsigned FirstPlusFour = FirstMask | 0x04040404;
13926 FirstMask = (FirstPlusFour & 0x0F0F0F0F) | FirstCs;
13938 FirstElt = std::next(SecondElt);
13939 if (FirstElt == Srcs.
end())
13942 SecondElt = std::next(FirstElt);
13945 if (SecondElt == Srcs.
end()) {
13951 DAG.
getConstant(FirstElt->PermMask, SL, MVT::i32)));
13957 return Perms.
size() == 2
13963 for (
auto &[EntryVal, EntryMask, EntryOffset] : Srcs) {
13964 EntryMask = EntryMask >> ((4 - ChainLength) * 8);
13965 auto ZeroMask = ChainLength == 2 ? 0x0c0c0000 : 0x0c000000;
13966 EntryMask += ZeroMask;
13971 auto Opcode =
Op.getOpcode();
13977static std::optional<bool>
13988 bool S0IsSigned = Known0.countMinLeadingOnes() > 0;
13991 bool S1IsSigned = Known1.countMinLeadingOnes() > 0;
13993 assert(!(S0IsUnsigned && S0IsSigned));
13994 assert(!(S1IsUnsigned && S1IsSigned));
14002 if ((S0IsUnsigned && S1IsUnsigned) || (S0IsSigned && S1IsSigned))
14008 if ((S0IsUnsigned && S1IsSigned) || (S0IsSigned && S1IsUnsigned))
14009 return std::nullopt;
14021 if ((S0IsSigned && !(S1IsSigned || S1IsUnsigned)) ||
14022 ((S1IsSigned && !(S0IsSigned || S0IsUnsigned))))
14027 if ((!(S1IsSigned || S1IsUnsigned) && !(S0IsSigned || S0IsUnsigned)))
14033 if ((S0IsUnsigned && !(S1IsSigned || S1IsUnsigned)) ||
14034 ((S1IsUnsigned && !(S0IsSigned || S0IsUnsigned))))
14035 return std::nullopt;
14041 DAGCombinerInfo &DCI)
const {
14043 EVT VT =
N->getValueType(0);
14050 if (
SDValue Folded = tryFoldToMad64_32(
N, DCI))
14055 if (
SDValue V = reassociateScalarOps(
N, DAG)) {
14062 std::optional<bool> IsSigned;
14068 int ChainLength = 0;
14069 for (
int I = 0;
I < 4;
I++) {
14070 auto MulIdx =
isMul(LHS) ? 0 :
isMul(RHS) ? 1 : -1;
14073 auto Src0 =
handleMulOperand(TempNode->getOperand(MulIdx)->getOperand(0));
14076 auto Src1 =
handleMulOperand(TempNode->getOperand(MulIdx)->getOperand(1));
14081 TempNode->getOperand(MulIdx), *Src0, *Src1,
14082 TempNode->getOperand(MulIdx)->getOperand(0),
14083 TempNode->getOperand(MulIdx)->getOperand(1), DAG);
14087 IsSigned = *IterIsSigned;
14088 if (*IterIsSigned != *IsSigned)
14091 auto AddIdx = 1 - MulIdx;
14094 if (
I == 2 &&
isMul(TempNode->getOperand(AddIdx))) {
14095 Src2s.
push_back(TempNode->getOperand(AddIdx));
14105 TempNode->getOperand(AddIdx), *Src0, *Src1,
14106 TempNode->getOperand(AddIdx)->getOperand(0),
14107 TempNode->getOperand(AddIdx)->getOperand(1), DAG);
14111 if (*IterIsSigned != *IsSigned)
14115 ChainLength =
I + 2;
14119 TempNode = TempNode->getOperand(AddIdx);
14121 ChainLength =
I + 1;
14122 if (TempNode->getNumOperands() < 2)
14124 LHS = TempNode->getOperand(0);
14125 RHS = TempNode->getOperand(1);
14128 if (ChainLength < 2)
14134 if (ChainLength < 4) {
14144 bool UseOriginalSrc =
false;
14145 if (ChainLength == 4 && Src0s.
size() == 1 && Src1s.
size() == 1 &&
14146 Src0s.
begin()->PermMask == Src1s.
begin()->PermMask &&
14147 Src0s.
begin()->SrcOp.getValueSizeInBits() >= 32 &&
14148 Src1s.
begin()->SrcOp.getValueSizeInBits() >= 32) {
14150 auto Src0Mask = Src0s.
begin()->PermMask;
14151 SrcBytes.
push_back(Src0Mask & 0xFF000000);
14152 bool UniqueEntries =
true;
14153 for (
auto I = 1;
I < 4;
I++) {
14154 auto NextByte = Src0Mask & (0xFF << ((3 -
I) * 8));
14157 UniqueEntries =
false;
14163 if (UniqueEntries) {
14164 UseOriginalSrc =
true;
14166 auto FirstElt = Src0s.
begin();
14170 auto SecondElt = Src1s.
begin();
14172 SecondElt->DWordOffset);
14181 if (!UseOriginalSrc) {
14188 DAG.
getExtOrTrunc(*IsSigned, Src2s[ChainLength - 1], SL, MVT::i32);
14191 : Intrinsic::amdgcn_udot4,
14201 if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
14206 unsigned Opc = LHS.getOpcode();
14211 Opc = RHS.getOpcode();
14217 auto Cond = RHS.getOperand(0);
14225 return DAG.
getNode(Opc, SL, VTList, Args);
14239 DAGCombinerInfo &DCI)
const {
14241 EVT VT =
N->getValueType(0);
14243 if (VT != MVT::i32)
14252 unsigned Opc = RHS.getOpcode();
14258 auto Cond = RHS.getOperand(0);
14266 return DAG.
getNode(Opc, SL, VTList, Args);
14280SDValue SITargetLowering::performAddCarrySubCarryCombine(
SDNode *
N,
14281 DAGCombinerInfo &DCI)
const {
14283 if (
N->getValueType(0) != MVT::i32)
14295 unsigned Opc =
N->getOpcode();
14298 SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1),
N->getOperand(2) };
14305 DAGCombinerInfo &DCI)
const {
14310 EVT VT =
N->getValueType(0);
14322 if (
A == LHS.getOperand(1)) {
14323 unsigned FusedOp = getFusedOpcode(DAG,
N, LHS.getNode());
14324 if (FusedOp != 0) {
14326 return DAG.
getNode(FusedOp, SL, VT,
A, Two, RHS);
14334 if (
A == RHS.getOperand(1)) {
14335 unsigned FusedOp = getFusedOpcode(DAG,
N, RHS.getNode());
14336 if (FusedOp != 0) {
14338 return DAG.
getNode(FusedOp, SL, VT,
A, Two, LHS);
14347 DAGCombinerInfo &DCI)
const {
14353 EVT VT =
N->getValueType(0);
14366 if (
A == LHS.getOperand(1)) {
14367 unsigned FusedOp = getFusedOpcode(DAG,
N, LHS.getNode());
14372 return DAG.
getNode(FusedOp, SL, VT,
A, Two, NegRHS);
14381 if (
A == RHS.getOperand(1)) {
14382 unsigned FusedOp = getFusedOpcode(DAG,
N, RHS.getNode());
14385 return DAG.
getNode(FusedOp, SL, VT,
A, NegTwo, LHS);
14394 DAGCombinerInfo &DCI)
const {
14397 EVT VT =
N->getValueType(0);
14411 bool IsNegative =
false;
14412 if (CLHS->isExactlyValue(1.0) ||
14413 (IsNegative = CLHS->isExactlyValue(-1.0))) {
14416 if (RHS.getOpcode() == ISD::FSQRT) {
14420 return IsNegative ? DAG.
getNode(ISD::FNEG, SL, VT, Rsq, Flags) : Rsq;
14429 DAGCombinerInfo &DCI)
const {
14431 EVT VT =
N->getValueType(0);
14453 (
N->getFlags().hasAllowContract() &&
14454 FMA->getFlags().hasAllowContract())) {
14469 if (FMAOp1.
getOpcode() != ISD::FP_EXTEND ||
14488 if (Vec1 == Vec2 || Vec3 == Vec4)
14494 if ((Vec1 == Vec3 && Vec2 == Vec4) ||
14495 (Vec1 == Vec4 && Vec2 == Vec3)) {
14504 DAGCombinerInfo &DCI)
const {
14510 EVT VT = LHS.getValueType();
14547 LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) &&
14554 const APInt &CT = LHS.getConstantOperandAPInt(1);
14555 const APInt &CF = LHS.getConstantOperandAPInt(2);
14567 if (VT != MVT::f32 && VT != MVT::f64 &&
14600 DAGCombinerInfo &DCI)
const {
14622 unsigned ShiftOffset = 8 *
Offset;
14624 ShiftOffset -=
C->getZExtValue();
14626 ShiftOffset +=
C->getZExtValue();
14628 if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
14630 MVT::f32, Shifted);
14641 DCI.AddToWorklist(
N);
14648 return DAG.
getNode(
N->getOpcode(), SL, MVT::f32, DemandedSrc);
14654 DAGCombinerInfo &DCI)
const {
14664 return DCI.DAG.getConstantFP(Zero,
SDLoc(
N),
N->getValueType(0));
14667 APFloat One(
F.getSemantics(),
"1.0");
14669 return DCI.DAG.getConstantFP(One,
SDLoc(
N),
N->getValueType(0));
14679 switch (
N->getOpcode()) {
14681 return performAddCombine(
N, DCI);
14683 return performSubCombine(
N, DCI);
14686 return performAddCarrySubCarryCombine(
N, DCI);
14688 return performFAddCombine(
N, DCI);
14690 return performFSubCombine(
N, DCI);
14692 return performFDivCombine(
N, DCI);
14694 return performSetCCCombine(
N, DCI);
14697 case ISD::FMAXNUM_IEEE:
14698 case ISD::FMINNUM_IEEE:
14699 case ISD::FMAXIMUM:
14700 case ISD::FMINIMUM:
14707 return performMinMaxCombine(
N, DCI);
14709 return performFMACombine(
N, DCI);
14711 return performAndCombine(
N, DCI);
14713 return performOrCombine(
N, DCI);
14716 if (
N->getValueType(0) == MVT::i32 &&
N->isDivergent() &&
14717 TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
14723 return performXorCombine(
N, DCI);
14725 return performZeroExtendCombine(
N, DCI);
14727 return performSignExtendInRegCombine(
N , DCI);
14729 return performClassCombine(
N, DCI);
14731 return performFCanonicalizeCombine(
N, DCI);
14733 return performRcpCombine(
N, DCI);
14748 return performUCharToFloatCombine(
N, DCI);
14750 return performFCopySignCombine(
N, DCI);
14755 return performCvtF32UByteNCombine(
N, DCI);
14757 return performFMed3Combine(
N, DCI);
14759 return performCvtPkRTZCombine(
N, DCI);
14761 return performClampCombine(
N, DCI);
14764 EVT VT =
N->getValueType(0);
14767 if (VT == MVT::v2i16 || VT == MVT::v2f16 || VT == MVT::v2bf16) {
14770 EVT EltVT = Src.getValueType();
14771 if (EltVT != MVT::i16)
14772 Src = DAG.
getNode(ISD::BITCAST, SL, MVT::i16, Src);
14775 return DAG.
getNode(ISD::BITCAST, SL, VT, Ext);
14781 return performExtractVectorEltCombine(
N, DCI);
14783 return performInsertVectorEltCombine(
N, DCI);
14785 return performFPRoundCombine(
N, DCI);
14794 return performMemSDNodeCombine(MemNode, DCI);
14807 default:
return ~0u;
14808 case AMDGPU::sub0:
return 0;
14809 case AMDGPU::sub1:
return 1;
14810 case AMDGPU::sub2:
return 2;
14811 case AMDGPU::sub3:
return 3;
14812 case AMDGPU::sub4:
return 4;
14819 unsigned Opcode =
Node->getMachineOpcode();
14823 if (D16Idx >= 0 &&
Node->getConstantOperandVal(D16Idx))
14829 unsigned OldDmask =
Node->getConstantOperandVal(DmaskIdx);
14830 unsigned NewDmask = 0;
14833 bool UsesTFC = ((int(TFEIdx) >= 0 &&
Node->getConstantOperandVal(TFEIdx)) ||
14834 (int(LWEIdx) >= 0 &&
Node->getConstantOperandVal(LWEIdx)))
14837 unsigned TFCLane = 0;
14838 bool HasChain =
Node->getNumValues() > 1;
14840 if (OldDmask == 0) {
14848 TFCLane = OldBitsSet;
14856 if (
I.getUse().getResNo() != 0)
14860 if (!
I->isMachineOpcode() ||
14861 I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
14873 if (UsesTFC && Lane == TFCLane) {
14878 for (
unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
14880 Dmask &= ~(1 << Comp);
14888 NewDmask |= 1 << Comp;
14893 bool NoChannels = !NewDmask;
14900 if (OldBitsSet == 1)
14906 if (NewDmask == OldDmask)
14915 unsigned NewChannels = BitsSet + UsesTFC;
14919 assert(NewOpcode != -1 &&
14920 NewOpcode !=
static_cast<int>(
Node->getMachineOpcode()) &&
14921 "failed to find equivalent MIMG op");
14929 MVT SVT =
Node->getValueType(0).getVectorElementType().getSimpleVT();
14931 MVT ResultVT = NewChannels == 1 ?
14933 NewChannels == 5 ? 8 : NewChannels);
14947 if (NewChannels == 1) {
14957 for (
unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
14962 if (i || !NoChannels)
14967 if (NewUser !=
User) {
14975 case AMDGPU::sub0: Idx = AMDGPU::sub1;
break;
14976 case AMDGPU::sub1: Idx = AMDGPU::sub2;
break;
14977 case AMDGPU::sub2: Idx = AMDGPU::sub3;
break;
14978 case AMDGPU::sub3: Idx = AMDGPU::sub4;
break;
14988 Op =
Op.getOperand(0);
15008 MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);
15012 = DAG.
getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal,
15019 return ToResultReg.
getNode();
15024 for (
unsigned i = 0; i < Node->getNumOperands(); ++i) {
15032 Node->getOperand(i).getValueType(),
15033 Node->getOperand(i)), 0));
15044 unsigned Opcode = Node->getMachineOpcode();
15046 if (
TII->isImage(Opcode) && !
TII->get(Opcode).mayStore() &&
15047 !
TII->isGather4(Opcode) &&
15049 return adjustWritemask(Node, DAG);
15052 if (Opcode == AMDGPU::INSERT_SUBREG ||
15053 Opcode == AMDGPU::REG_SEQUENCE) {
15059 case AMDGPU::V_DIV_SCALE_F32_e64:
15060 case AMDGPU::V_DIV_SCALE_F64_e64: {
15070 (Src0 == Src1 || Src0 == Src2))
15127 unsigned InitIdx = 0;
15129 if (
TII->isImage(
MI)) {
15137 unsigned TFEVal = TFE ? TFE->
getImm() : 0;
15138 unsigned LWEVal = LWE ? LWE->
getImm() : 0;
15139 unsigned D16Val = D16 ? D16->getImm() : 0;
15141 if (!TFEVal && !LWEVal)
15152 assert(MO_Dmask &&
"Expected dmask operand in instruction");
15154 unsigned dmask = MO_Dmask->
getImm();
15161 InitIdx = D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
15167 TRI.getRegSizeInBits(*
TII->getOpRegClass(
MI, DstIdx)) / 32;
15168 if (DstSize < InitIdx)
15171 InitIdx =
TRI.getRegSizeInBits(*
TII->getOpRegClass(
MI, DstIdx)) / 32;
15179 Register PrevDst =
MRI.cloneVirtualRegister(
MI.getOperand(DstIdx).getReg());
15180 unsigned NewDst = 0;
15189 for (; SizeLeft; SizeLeft--, CurrIdx++) {
15190 NewDst =
MRI.createVirtualRegister(
TII->getOpRegClass(
MI, DstIdx));
15208 MI.tieOperands(DstIdx,
MI.getNumOperands() - 1);
15221 if (
TII->isVOP3(
MI.getOpcode())) {
15223 TII->legalizeOperandsVOP3(
MRI,
MI);
15228 if (!
MI.getDesc().operands().empty()) {
15229 unsigned Opc =
MI.getOpcode();
15230 bool HasAGPRs = Info->mayNeedAGPRs();
15238 if ((
I == Src2Idx) && (HasAGPRs))
15241 if (!
Op.isReg() || !
Op.getReg().isVirtual())
15243 auto *RC =
TRI->getRegClassForReg(
MRI,
Op.getReg());
15244 if (!
TRI->hasAGPRs(RC))
15246 auto *Src =
MRI.getUniqueVRegDef(
Op.getReg());
15247 if (!Src || !Src->isCopy() ||
15248 !
TRI->isSGPRReg(
MRI, Src->getOperand(1).getReg()))
15250 auto *NewRC =
TRI->getEquivalentVGPRClass(RC);
15254 MRI.setRegClass(
Op.getReg(), NewRC);
15261 if (
auto *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2)) {
15262 if (Src2->isReg() && Src2->getReg().isVirtual()) {
15263 auto *RC =
TRI->getRegClassForReg(
MRI, Src2->getReg());
15264 if (
TRI->isVectorSuperClass(RC)) {
15265 auto *NewRC =
TRI->getEquivalentAGPRClass(RC);
15266 MRI.setRegClass(Src2->getReg(), NewRC);
15267 if (Src2->isTied())
15268 MRI.setRegClass(
MI.getOperand(0).getReg(), NewRC);
15277 if (
TII->isImage(
MI))
15278 TII->enforceOperandRCAlignment(
MI, AMDGPU::OpName::vaddr);
15304 MVT::v2i32, Ops0), 0);
15334 RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
15356std::pair<unsigned, const TargetRegisterClass *>
15363 if (Constraint.
size() == 1) {
15365 switch (Constraint[0]) {
15372 RC = &AMDGPU::SReg_32RegClass;
15375 RC = &AMDGPU::SGPR_64RegClass;
15380 return std::pair(0U,
nullptr);
15387 RC = &AMDGPU::VGPR_32RegClass;
15392 return std::pair(0U,
nullptr);
15401 RC = &AMDGPU::AGPR_32RegClass;
15406 return std::pair(0U,
nullptr);
15415 return std::pair(0U, RC);
15420 if (
RegName.consume_front(
"v")) {
15421 RC = &AMDGPU::VGPR_32RegClass;
15422 }
else if (
RegName.consume_front(
"s")) {
15423 RC = &AMDGPU::SGPR_32RegClass;
15424 }
else if (
RegName.consume_front(
"a")) {
15425 RC = &AMDGPU::AGPR_32RegClass;
15430 if (
RegName.consume_front(
"[")) {
15440 RC =
TRI->getVGPRClassForBitWidth(Width);
15442 RC =
TRI->getSGPRClassForBitWidth(Width);
15444 RC =
TRI->getAGPRClassForBitWidth(Width);
15446 Reg =
TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
15447 return std::pair(Reg, RC);
15452 if (!
Failed && Idx < RC->getNumRegs())
15460 Ret.second =
TRI->getPhysRegBaseClass(Ret.first);
15466 if (Constraint.
size() == 1) {
15467 switch (Constraint[0]) {
15476 }
else if (Constraint ==
"DA" ||
15477 Constraint ==
"DB") {
15485 if (Constraint.
size() == 1) {
15486 switch (Constraint[0]) {
15509 std::vector<SDValue> &Ops,
15524 unsigned Size =
Op.getScalarValueSizeInBits();
15532 Val =
C->getSExtValue();
15536 Val =
C->getValueAPF().bitcastToAPInt().getSExtValue();
15542 if (
Op.getOperand(0).isUndef() ||
Op.getOperand(1).isUndef())
15545 Val =
C->getSExtValue();
15549 Val =
C->getValueAPF().bitcastToAPInt().getSExtValue();
15559 if (Constraint.
size() == 1) {
15560 switch (Constraint[0]) {
15575 }
else if (Constraint.
size() == 2) {
15576 if (Constraint ==
"DA") {
15577 int64_t HiBits =
static_cast<int32_t
>(Val >> 32);
15578 int64_t LoBits =
static_cast<int32_t
>(Val);
15582 if (Constraint ==
"DB") {
15590 unsigned MaxSize)
const {
15591 unsigned Size = std::min<unsigned>(
Op.getScalarValueSizeInBits(), MaxSize);
15594 MVT VT =
Op.getSimpleValueType();
15619 switch (UnalignedClassID) {
15620 case AMDGPU::VReg_64RegClassID:
15621 return AMDGPU::VReg_64_Align2RegClassID;
15622 case AMDGPU::VReg_96RegClassID:
15623 return AMDGPU::VReg_96_Align2RegClassID;
15624 case AMDGPU::VReg_128RegClassID:
15625 return AMDGPU::VReg_128_Align2RegClassID;
15626 case AMDGPU::VReg_160RegClassID:
15627 return AMDGPU::VReg_160_Align2RegClassID;
15628 case AMDGPU::VReg_192RegClassID:
15629 return AMDGPU::VReg_192_Align2RegClassID;
15630 case AMDGPU::VReg_224RegClassID:
15631 return AMDGPU::VReg_224_Align2RegClassID;
15632 case AMDGPU::VReg_256RegClassID:
15633 return AMDGPU::VReg_256_Align2RegClassID;
15634 case AMDGPU::VReg_288RegClassID:
15635 return AMDGPU::VReg_288_Align2RegClassID;
15636 case AMDGPU::VReg_320RegClassID:
15637 return AMDGPU::VReg_320_Align2RegClassID;
15638 case AMDGPU::VReg_352RegClassID:
15639 return AMDGPU::VReg_352_Align2RegClassID;
15640 case AMDGPU::VReg_384RegClassID:
15641 return AMDGPU::VReg_384_Align2RegClassID;
15642 case AMDGPU::VReg_512RegClassID:
15643 return AMDGPU::VReg_512_Align2RegClassID;
15644 case AMDGPU::VReg_1024RegClassID:
15645 return AMDGPU::VReg_1024_Align2RegClassID;
15646 case AMDGPU::AReg_64RegClassID:
15647 return AMDGPU::AReg_64_Align2RegClassID;
15648 case AMDGPU::AReg_96RegClassID:
15649 return AMDGPU::AReg_96_Align2RegClassID;
15650 case AMDGPU::AReg_128RegClassID:
15651 return AMDGPU::AReg_128_Align2RegClassID;
15652 case AMDGPU::AReg_160RegClassID:
15653 return AMDGPU::AReg_160_Align2RegClassID;
15654 case AMDGPU::AReg_192RegClassID:
15655 return AMDGPU::AReg_192_Align2RegClassID;
15656 case AMDGPU::AReg_256RegClassID:
15657 return AMDGPU::AReg_256_Align2RegClassID;
15658 case AMDGPU::AReg_512RegClassID:
15659 return AMDGPU::AReg_512_Align2RegClassID;
15660 case AMDGPU::AReg_1024RegClassID:
15661 return AMDGPU::AReg_1024_Align2RegClassID;
15677 if (Info->isEntryFunction()) {
15684 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
15686 ? AMDGPU::SGPR_32RegClass.getRegister(MaxNumSGPRs - 1)
15687 :
TRI->getAlignedHighSGPRForRC(MF, 2,
15688 &AMDGPU::SGPR_64RegClass);
15689 Info->setSGPRForEXECCopy(SReg);
15691 assert(!
TRI->isSubRegister(Info->getScratchRSrcReg(),
15692 Info->getStackPtrOffsetReg()));
15693 if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
15694 MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
15698 if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
15699 MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
15701 if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
15702 MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
15704 Info->limitOccupancy(MF);
15706 if (ST.isWave32() && !MF.
empty()) {
15707 for (
auto &
MBB : MF) {
15708 for (
auto &
MI :
MBB) {
15709 TII->fixImplicitOperands(
MI);
15719 if (ST.needsAlignedVGPRs()) {
15720 for (
unsigned I = 0, E =
MRI.getNumVirtRegs();
I != E; ++
I) {
15726 if (NewClassID != -1)
15727 MRI.setRegClass(Reg,
TRI->getRegClass(NewClassID));
15736 const APInt &DemandedElts,
15738 unsigned Depth)
const {
15740 unsigned Opc =
Op.getOpcode();
15743 unsigned IID =
Op.getConstantOperandVal(0);
15745 case Intrinsic::amdgcn_mbcnt_lo:
15746 case Intrinsic::amdgcn_mbcnt_hi: {
15753 unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2());
15755 MaxActiveBits += Src1ValBits ? 1 : 0;
15756 unsigned Size =
Op.getValueType().getSizeInBits();
15757 if (MaxActiveBits <
Size)
15766 Op, Known, DemandedElts, DAG,
Depth);
15781 unsigned MaxValue =
15790 switch (
MI->getOpcode()) {
15791 case AMDGPU::G_INTRINSIC:
15792 case AMDGPU::G_INTRINSIC_CONVERGENT: {
15794 case Intrinsic::amdgcn_workitem_id_x:
15797 case Intrinsic::amdgcn_workitem_id_y:
15800 case Intrinsic::amdgcn_workitem_id_z:
15803 case Intrinsic::amdgcn_mbcnt_lo:
15804 case Intrinsic::amdgcn_mbcnt_hi: {
15806 unsigned Size =
MRI.getType(R).getSizeInBits();
15810 case Intrinsic::amdgcn_groupstaticsize: {
15821 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
15824 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
15827 case AMDGPU::G_AMDGPU_SMED3:
15828 case AMDGPU::G_AMDGPU_UMED3: {
15829 auto [Dst, Src0, Src1, Src2] =
MI->getFirst4Regs();
15856 unsigned Depth)
const {
15864 if (
MaybeAlign RetAlign = Attrs.getRetAlignment())
15891 if (Header->getAlignment() != PrefAlign)
15894 unsigned LoopSize = 0;
15902 LoopSize +=
TII->getInstSizeInBytes(
MI);
15903 if (LoopSize > 192)
15908 if (LoopSize <= 64)
15911 if (LoopSize <= 128)
15912 return CacheLineAlign;
15918 auto I = Exit->getFirstNonDebugInstr();
15919 if (
I != Exit->end() &&
I->getOpcode() == AMDGPU::S_INST_PREFETCH)
15920 return CacheLineAlign;
15929 if (PreTerm == Pre->
begin() ||
15930 std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
15934 auto ExitHead = Exit->getFirstNonDebugInstr();
15935 if (ExitHead == Exit->end() ||
15936 ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
15941 return CacheLineAlign;
15949 N =
N->getOperand(0).getNode();
15950 if (
N->getOpcode() == ISD::INLINEASM ||
15951 N->getOpcode() == ISD::INLINEASM_BR)
15960 switch (
N->getOpcode()) {
15968 if (Reg.isPhysical() ||
MRI.isLiveIn(Reg))
15969 return !
TRI->isSGPRReg(
MRI, Reg);
15975 return !
TRI->isSGPRReg(
MRI, Reg);
15979 unsigned AS = L->getAddressSpace();
15983 case ISD::CALLSEQ_END:
16012 return A->readMem() &&
A->writeMem();
16033 switch (Ty.getScalarSizeInBits()) {
16047 unsigned Depth)
const {
16052 if (Info->getMode().DX10Clamp)
16065static bool fpModeMatchesGlobalFPAtomicMode(
const AtomicRMWInst *RMW) {
16067 auto DenormMode = RMW->
getParent()->getParent()->getDenormalMode(
Flt);
16079 return F->getFnAttribute(
"amdgpu-unsafe-fp-atomics").getValueAsString() !=
16086 Ctx.getSyncScopeNames(SSNs);
16092 <<
"Hardware instruction generated for atomic "
16094 <<
" operation at memory scope " << MemScope;
16099 Type *EltTy = VT->getElementType();
16100 return VT->getNumElements() == 2 &&
16132 bool HasSystemScope =
16164 if (Ty->isFloatTy()) {
16169 if (Ty->isDoubleTy()) {
16185 if (Subtarget->
hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
16219 if (HasSystemScope)
16227 if (Ty->isFloatTy()) {
16274 if (HasSystemScope)
16309 if (HasSystemScope)
16346 if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
16348 : &AMDGPU::SReg_32RegClass;
16349 if (!
TRI->isSGPRClass(RC) && !isDivergent)
16350 return TRI->getEquivalentSGPRClass(RC);
16351 if (
TRI->isSGPRClass(RC) && isDivergent)
16352 return TRI->getEquivalentVGPRClass(RC);
16364 unsigned WaveSize) {
16369 if (!
IT ||
IT->getBitWidth() != WaveSize)
16374 if (!Visited.
insert(V).second)
16376 bool Result =
false;
16377 for (
const auto *U : V->users()) {
16379 if (V == U->getOperand(1)) {
16380 switch (Intrinsic->getIntrinsicID()) {
16384 case Intrinsic::amdgcn_if_break:
16385 case Intrinsic::amdgcn_if:
16386 case Intrinsic::amdgcn_else:
16391 if (V == U->getOperand(0)) {
16392 switch (Intrinsic->getIntrinsicID()) {
16396 case Intrinsic::amdgcn_end_cf:
16397 case Intrinsic::amdgcn_loop:
16403 Result =
hasCFUser(U, Visited, WaveSize);
16412 const Value *V)
const {
16414 if (CI->isInlineAsm()) {
16423 for (
auto &TC : TargetConstraints) {
16427 SIRI, TC.ConstraintCode, TC.ConstraintVT).second;
16440 for (;
I !=
E; ++
I) {
16464 return MRI.hasOneNonDBGUse(N0);
16471 if (
I.getMetadata(
"amdgpu.noclobber"))
16473 if (
I.getMetadata(
"amdgpu.last.use"))
16483 if (!Def->isMachineOpcode())
16493 if (
II.isCompare() &&
II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
16494 PhysReg = AMDGPU::SCC;
16496 TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
16510 "this cannot be replaced with add");
16516 "target should have atomic fadd instructions");
16519 "generic atomicrmw expansion only supports FP32 operand in flat "
16593 for (
auto &
P : MDs)
16598 std::prev(BB->
end())->eraseFromParent();
16599 Builder.SetInsertPoint(BB);
16600 Builder.CreateBr(CheckSharedBB);
16602 Builder.SetInsertPoint(CheckSharedBB);
16603 CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
16604 {
Addr},
nullptr,
"is.shared");
16605 Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);
16607 Builder.SetInsertPoint(SharedBB);
16608 Value *CastToLocal = Builder.CreateAddrSpaceCast(
16610 Value *LoadedShared = CreateNewAtomicRMW(Builder, CastToLocal, Val);
16611 Builder.CreateBr(PhiBB);
16613 Builder.SetInsertPoint(CheckPrivateBB);
16614 CallInst *IsPrivate = Builder.CreateIntrinsic(
16615 Intrinsic::amdgcn_is_private, {}, {
Addr},
nullptr,
"is.private");
16616 Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB);
16618 Builder.SetInsertPoint(PrivateBB);
16619 Value *CastToPrivate = Builder.CreateAddrSpaceCast(
16621 Value *LoadedPrivate =
16622 Builder.CreateLoad(ValTy, CastToPrivate,
"loaded.private");
16623 Value *NewVal = Builder.CreateFAdd(LoadedPrivate, Val,
"val.new");
16624 Builder.CreateStore(NewVal, CastToPrivate);
16625 Builder.CreateBr(PhiBB);
16627 Builder.SetInsertPoint(GlobalBB);
16628 Value *CastToGlobal = Builder.CreateAddrSpaceCast(
16630 Value *LoadedGlobal = CreateNewAtomicRMW(Builder, CastToGlobal, Val);
16631 Builder.CreateBr(PhiBB);
16633 Builder.SetInsertPoint(PhiBB);
16634 PHINode *Loaded = Builder.CreatePHI(ValTy, 3,
"loaded.phi");
16638 Builder.CreateBr(ExitBB);
16656 LoadInst *LI = Builder.CreateAlignedLoad(
static bool isMul(MachineInstr *MI)
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, SmallVectorImpl< Register > &PackedAddrs, unsigned ArgOffset, const AMDGPU::ImageDimIntrinsicInfo *Intr, bool IsA16, bool IsG16)
Turn a set of s16 typed registers in AddrRegs into a dword sized vector with s16 typed elements.
static bool isKnownNonNull(Register Val, MachineRegisterInfo &MRI, const AMDGPUTargetMachine &TM, unsigned AddrSpace)
Return true if the value is a known valid address, such that a null check is not necessary.
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ATTRIBUTE_UNUSED
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
iv Induction Variable Users
static const unsigned MaxDepth
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E, int I, uint32_t *Buf)
#define FP_DENORM_FLUSH_NONE
#define FP_DENORM_FLUSH_IN_FLUSH_OUT
static void reservePrivateMemoryRegs(const TargetMachine &TM, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info)
static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT, const SDLoc &DL, SelectionDAG &DAG, bool Unpacked)
static MachineBasicBlock * emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF)
static std::pair< unsigned, int > computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset)
static bool denormalModeIsFlushAllF32(const MachineFunction &MF)
static bool addresses16Bits(int Mask)
static bool isClampZeroToOne(SDValue A, SDValue B)
static bool supportsMin3Max3(const GCNSubtarget &Subtarget, unsigned Opc, EVT VT)
static unsigned findFirstFreeSGPR(CCState &CCInfo)
static uint32_t getPermuteMask(SDValue V)
static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static int getAlignedAGPRClassID(unsigned UnalignedClassID)
static void processPSInputArgs(SmallVectorImpl< ISD::InputArg > &Splits, CallingConv::ID CallConv, ArrayRef< ISD::InputArg > Ins, BitVector &Skipped, FunctionType *FType, SIMachineFunctionInfo *Info)
static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG, const GCNSubtarget *Subtarget)
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG, ISD::LoadExtType ExtType, SDValue Op, const SDLoc &SL, EVT VT)
static void fixMasks(SmallVectorImpl< DotSrc > &Srcs, unsigned ChainLength)
static SDValue strictFPExtFromF16(SelectionDAG &DAG, SDValue Src)
Return the source of an fp_extend from f16 to f32, or a converted FP constant.
static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val)
static cl::opt< bool > DisableLoopAlignment("amdgpu-disable-loop-alignment", cl::desc("Do not align and prefetch loops"), cl::init(false))
static SDValue getDWordFromOffset(SelectionDAG &DAG, SDLoc SL, SDValue Src, unsigned DWordOffset)
static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, unsigned InitResultReg, unsigned PhiReg, int Offset, bool UseGPRIdxMode, Register &SGPRIdxReg)
static bool isImmConstraint(StringRef Constraint)
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT, SDValue Src, int ExtraElts)
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasCFUser(const Value *V, SmallPtrSet< const Value *, 16 > &Visited, unsigned WaveSize)
static OptimizationRemark emitAtomicRMWLegalRemark(const AtomicRMWInst *RMW)
static unsigned SubIdx2Lane(unsigned Idx)
Helper function for adjustWritemask.
static bool addressMayBeAccessedAsPrivate(const MachineMemOperand *MMO, const SIMachineFunctionInfo &Info)
static MachineBasicBlock * lowerWaveReduce(MachineInstr &MI, MachineBasicBlock &BB, const GCNSubtarget &ST, unsigned Opc)
static bool elementPairIsContiguous(ArrayRef< int > Mask, int Elt)
static bool isHalf2OrBFloat2(Type *Ty)
static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo, const TargetRegisterClass *RC, unsigned NumArgRegs)
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue N0, SDValue N1, SDValue N2, bool Signed)
static SDValue resolveSources(SelectionDAG &DAG, SDLoc SL, SmallVectorImpl< DotSrc > &Srcs, bool IsSigned, bool IsAny)
static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op, SDValue &OtherOp)
static void placeSources(ByteProvider< SDValue > &Src0, ByteProvider< SDValue > &Src1, SmallVectorImpl< DotSrc > &Src0s, SmallVectorImpl< DotSrc > &Src1s, int Step)
static bool isHalf2(Type *Ty)
bool unsafeFPAtomicsDisabled(Function *F)
static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI, const DataLayout &DL, Type *Ty, unsigned MaxNumLanes)
static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB, const DebugLoc &DL, const MachineOperand &Idx, unsigned InitReg, unsigned ResultReg, unsigned PhiReg, unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode, Register &SGPRIdxReg)
static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isFrameIndexOp(SDValue Op)
static ConstantFPSDNode * getSplatConstantFP(SDValue Op)
static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg)
static bool isExtendedFrom16Bits(SDValue &Operand)
static std::optional< bool > checkDot4MulSignedness(const SDValue &N, ByteProvider< SDValue > &Src0, ByteProvider< SDValue > &Src1, const SDValue &S0Op, const SDValue &S1Op, const SelectionDAG &DAG)
static bool vectorEltWillFoldAway(SDValue Op)
static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG, const SIMachineFunctionInfo *Info, const GCNSubtarget *ST)
static uint32_t getConstantPermuteMask(uint32_t C)
static MachineBasicBlock * emitIndirectDst(MachineInstr &MI, MachineBasicBlock &MBB, const GCNSubtarget &ST)
static bool isBFloat2(Type *Ty)
static void setM0ToIndexFromSGPR(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset)
static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask=~0u, ArgDescriptor Arg=ArgDescriptor())
static std::pair< MachineBasicBlock *, MachineBasicBlock * > splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop)
static unsigned getBasePtrIndex(const MemSDNode *N)
MemSDNode::getBasePtr() does not work for intrinsics, which needs to offset by the chain and intrinsi...
static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB, KnownBits &Known, unsigned Dim)
static LLVM_ATTRIBUTE_UNUSED bool isCopyFromRegOfInlineAsm(const SDNode *N)
static void allocateFixedSGPRInputImpl(CCState &CCInfo, const TargetRegisterClass *RC, MCRegister Reg)
static SDValue constructRetValue(SelectionDAG &DAG, MachineSDNode *Result, ArrayRef< EVT > ResultTypes, bool IsTexFail, bool Unpacked, bool IsD16, int DMaskPop, int NumVDataDwords, bool IsAtomicPacked16Bit, const SDLoc &DL)
static std::optional< ByteProvider< SDValue > > handleMulOperand(const SDValue &MulOperand)
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static Register getIndirectSGPRIdx(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, int Offset)
static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
static EVT memVTFromLoadIntrData(const SITargetLowering &TLI, const DataLayout &DL, Type *Ty, unsigned MaxNumLanes)
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc)
static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL, uint64_t Val)
static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL, ArrayRef< SDValue > Elts)
static SDNode * findUser(SDValue Value, unsigned Opcode)
Helper function for LowerBRCOND.
static unsigned addPermMasks(unsigned First, unsigned Second)
static uint64_t clearUnusedBits(uint64_t Val, unsigned Size)
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue C, SDValue GlueChain, SDNodeFlags Flags)
static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT)
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL, EVT VT, SDValue A, SDValue B, SDValue GlueChain, SDNodeFlags Flags)
static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, const SDLoc &DL, int64_t Offset, EVT PtrVT, unsigned GAFlags=SIInstrInfo::MO_NONE)
static cl::opt< bool > UseDivergentRegisterIndexing("amdgpu-use-divergent-register-indexing", cl::Hidden, cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false))
static const std::optional< ByteProvider< SDValue > > calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex=0, unsigned Depth=0)
static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg)
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static constexpr int Concat[]
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo)
static bool isUniformMMO(const MachineMemOperand *MMO)
static std::optional< uint32_t > getLDSKernelIdMetadata(const Function &F)
uint32_t getLDSSize() const
void setUsesDynamicLDS(bool DynLDS)
void setDynLDSAlign(const Function &F, const GlobalVariable &GV)
bool isEntryFunction() const
bool hasMadMacF32Insts() const
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
bool hasMadMixInsts() const
unsigned getWavefrontSizeLog2() const
bool has16BitInsts() const
bool isAmdHsaOrMesa(const Function &F) const
bool hasFastFMAF32() const
bool hasTrigReducedRange() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
bool hasVOP3PInsts() const
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const
static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, SDNodeFlags Flags)
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const
static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Widen a suitably aligned v3 load.
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Value * getPointerOperand()
void setOperation(BinOp Operation)
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This is an SDNode representing atomic operations.
bool isCompareAndSwap() const
Returns true if this SDNode represents cmpxchg atomic operation, false otherwise.
This class holds the attributes for a function, its return value, and its parameters.
MemoryEffects getMemoryEffects() const
Returns memory effects of the function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
static ByteProvider getConstantZero()
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
std::optional< ISelOp > Src
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isAllocated(MCRegister Reg) const
isAllocated - Return true if the specified register (or an alias) is allocated.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
int64_t getLocMemOffset() const
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
bool isIntPredicate() const
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isInfinity() const
Return true if the value is an infinity.
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Register DemoteRegister
DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg allocated to hold a pointer to ...
const Value * getValueFromVirtualReg(Register Vreg)
This method is called from TargetLowerinInfo::isSDNodeSourceOfDivergence to get the Value correspondi...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasD16Images() const
bool hasAtomicDsPkAdd16Insts() const
bool hasImageStoreD16Bug() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasDot7Insts() const
bool hasApertureRegs() const
bool hasFlatInstOffsets() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasBCNT(unsigned Size) const
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasUnalignedDSAccessEnabled() const
const SIInstrInfo * getInstrInfo() const override
bool hasDot1Insts() const
bool hasAtomicFaddRtnInsts() const
Align getStackAlignment() const
bool hasScalarSubwordLoads() const
bool enableFlatScratch() const
bool hasDwordx3LoadStores() const
bool hasFlatScrRegister() const
bool supportsGetDoorbellID() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasLDSMisalignedBug() const
bool hasUserSGPRInit16Bug() const
TrapHandlerAbi getTrapHandlerAbi() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasGFX10_AEncoding() const
bool hasPackedFP32Ops() const
bool hasGFX940Insts() const
bool hasFullRate64Ops() const
bool isTrapHandlerEnabled() const
bool hasLDSFPAtomicAddF64() const
bool hasFlatGlobalInsts() const
bool getScalarizeGlobalBehavior() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool hasShaderCyclesHiLoRegisters() const
bool hasNSAEncoding() const
bool hasSMemRealTime() const
bool usePRTStrictNull() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
bool hasAtomicFlatPkAdd16Insts() const
bool hasUnalignedBufferAccessEnabled() const
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
bool hasImageGather4D16Bug() const
bool supportsMinMaxDenormModes() const
bool hasAtomicFaddInsts() const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasAtomicBufferPkAddBF16Inst() const
bool hasAtomicFaddNoRtnInsts() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool hasScalarDwordx3Loads() const
bool hasLDSFPAtomicAddF32() const
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDot8Insts() const
bool hasDS96AndDS128() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
bool hasScalarAddSub64() const
bool hasIEEEMinMax3() const
bool hasUnpackedD16VMem() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
bool hasPackedTID() const
bool hasAddNoCarry() const
bool hasKernargSegmentPtr() const
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
unsigned getNumFreeUserSGPRs()
bool hasImplicitBufferPtr() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
virtual void computeKnownBitsImpl(Register R, KnownBits &Known, const APInt &DemandedElts, unsigned Depth=0)
const MachineFunction & getMachineFunction() const
int64_t getOffset() const
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
bool hasExternalLinkage() const
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
Type * getValueType() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
const Function * getFunction() const
Return the function this instruction belongs to.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
Get all metadata attached to this Instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
uint64_t getScalarSizeInBits() const
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Align getAlignment() const
Return alignment of the basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasCalls() const
Return true if the current function has any function calls.
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
bool hasStackObjects() const
Return true if there are any stack objects in this function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
A Module instance is used to store all the information related to an LLVM module.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool hasWorkGroupIDZ() const
SIModeRegisterDefaults getMode() const
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
unsigned getBytesInStackArgArea() const
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isVGPRClass(const TargetRegisterClass *RC)
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDNode * PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override
Fold the instructions after selecting them.
SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const
MachineSDNode * wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const
bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, unsigned &PhysReg, int &Cost) const override
Allows the target to handle physreg-carried dependency in target-specific way.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool requiresUniformRegister(MachineFunction &MF, const Value *V) const override
Allows target to decide about the register class of the specific value that is live outside the defin...
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override
Returns true if be combined with to form an ISD::FMAD.
AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void bundleInstWithWaitcnt(MachineInstr &MI) const
Insert MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override
Map address space 7 to MVT::v5i32 because that's its in-memory representation.
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
SDNode * legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const
Legalize target independent instructions (e.g.
bool allowsMisalignedMemoryAccessesImpl(unsigned Size, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const
const GCNSubtarget * getSubtarget() const
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
bool shouldEmitGOTReloc(const GlobalValue *GV) const
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const
SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const
void allocateSpecialInputSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const
SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
void allocateHSAUserSGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent) const override
Return the register class that should be used for the specified value type.
void AddMemOpInit(MachineInstr &MI) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool isLegalGlobalAddressingMode(const AddrMode &AM) const
void computeKnownBitsForFrameIndex(int FrameIdx, KnownBits &Known, const MachineFunction &MF) const override
Determine which of the bits of FrameIndex FIOp are known to be 0.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const
bool isShuffleMaskLegal(ArrayRef< int >, EVT) const override
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
LLT getPreferredShiftAmountTy(LLT Ty) const override
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isMemOpUniform(const SDNode *N) const
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const
void allocateSpecialInputVGPRsFixed(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments in fixed registers.
LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
MachineBasicBlock * emitGWSMemViolTestLoop(MachineInstr &MI, MachineBasicBlock *BB) const
bool checkAsmConstraintValA(SDValue Op, uint64_t Val, unsigned MaxSize=64) const
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool shouldEmitFixup(const GlobalValue *GV) const
MachineBasicBlock * splitKillBlock(MachineInstr &MI, MachineBasicBlock *BB) const
bool hasMemSDNodeUser(SDNode *N) const
bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, UniformityInfo *UA) const override
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SmallVectorImpl< ISD::InputArg > &Ins, SelectionDAG &DAG) const
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const
bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const
SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals, bool isThisReturn, SDValue ThisVal) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation input to an Opcode operation is free (for instance,...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
Assign the register class depending on the number of bits set in the writemask.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void allocateSpecialInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
Allocate implicit function VGPR arguments at the end of allocated user arguments.
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
static bool isNonGlobalAddrSpace(unsigned AS)
MachineSDNode * buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) const
Return a resource descriptor with the 'Add TID' bit enabled The TID (Thread ID) is multiplied by the ...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool mayBeEmittedAsTailCall(const CallInst *) const override
Return true if the target may be able emit the call instruction as a tail call.
void passSpecialInputs(CallLoweringInfo &CLI, CCState &CCInfo, const SIMachineFunctionInfo &Info, SmallVectorImpl< std::pair< unsigned, SDValue > > &RegsToPass, SmallVectorImpl< SDValue > &MemOpChains, SDValue Chain) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool checkAsmConstraintVal(SDValue Op, StringRef Constraint, uint64_t Val) const
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
bool shouldUseLDSConstAddress(const GlobalValue *GV) const
bool supportSplitCSR(MachineFunction *MF) const override
Return true if the target supports that a subset of CSRs for the given machine function is handled ex...
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
LLT handling variant.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI)
bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
bool shouldEmitPCReloc(const GlobalValue *GV) const
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void allocateSpecialEntryInputVGPRs(CCState &CCInfo, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
void allocatePreloadKernArgSGPRs(CCState &CCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< ISD::InputArg > &Ins, MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, MachineFunction &MF, unsigned IntrinsicID) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const
bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const override
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
Determine the known alignment for the pointer value R.
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override
Similarly, the in-memory representation of a p7 is {p8, i32}, aka v8i32 when padding is added.
void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, CallingConv::ID CallConv, bool IsShader) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const Pass * getPass() const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by first bitcasting (from potentia...
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
@ ZeroOrOneBooleanContent
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned getID() const
Return the register class ID number.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
int getCopyCost() const
Return the cost of copying a value between two registers in this class.
iterator begin() const
begin/end - Return all of the registers in this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
OSType getOS() const
Get the parsed operating system type of this triple.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isFunctionTy() const
True if this is an instance of FunctionType.
bool isIntegerTy() const
True if this is an instance of IntegerType.
const fltSemantics & getFltSemantics() const
bool isVoidTy() const
Return true if this is 'void'.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
void takeName(Value *V)
Transfer the name from V to this value.
Type * getElementType() const
constexpr bool isZero() const
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ STREAMOUT_REGISTER
Internal address spaces. Can be freely renumbered.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
@ BUFFER_ATOMIC_COND_SUB_U32
@ TBUFFER_LOAD_FORMAT_D16
@ TBUFFER_STORE_FORMAT_D16
@ BUFFER_STORE_FORMAT_D16
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isFlatGlobalAddrSpace(unsigned AS)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
const uint64_t FltRoundToHWConversionTable
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isEntryFunctionCC(CallingConv::ID CC)
LLVM_READNONE bool isKernel(CallingConv::ID CC)
bool isGFX11(const MCSubtargetInfo &STI)
bool isCompute(CallingConv::ID cc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
bool isChainCC(CallingConv::ID CC)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
bool getMUBUFTfe(unsigned Opc)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isShader(CallingConv::ID cc)
bool isGFX10Plus(const MCSubtargetInfo &STI)
LLVM_READONLY int getVOPe64(uint16_t Opcode)
@ TowardZeroF32_TowardNegativeF64
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
uint32_t decodeFltRoundToHWConversionTable(uint32_t FltRounds)
Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
bool isExtendedGlobalAddrSpace(unsigned AS)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
bool isGraphics(CallingConv::ID cc)
const uint64_t FltRoundConversionTable
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ MaxID
The highest possible ID. Must be some 2^k - 1.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ UNDEF
UNDEF - An undefined node.
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ SMULO
Same for multiplication.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
sandboxir::Value * getValue(llvm::Value *V) const
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred)
getICmpCondCode - Return the ISD condition code corresponding to the given LLVM IR integer condition ...
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
testing::Matcher< const detail::ErrorHolder & > Failed()
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool isReleaseOrStronger(AtomicOrdering AO)
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
@ TowardPositive
roundTowardPositive.
@ TowardNegative
roundTowardNegative.
unsigned M0(unsigned Val)
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
ArrayRef(const T &OneElt) -> ArrayRef< T >
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
ArgDescriptor WorkItemIDZ
ArgDescriptor WorkItemIDY
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(PreloadedValue Value) const
ArgDescriptor WorkItemIDX
static constexpr uint64_t encode(Fields... Values)
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
static ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
MCRegister getRegister() const
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ Dynamic
Denormals have unknown treatment.
static constexpr DenormalMode getPreserveSign()
static constexpr DenormalMode getIEEE()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
uint64_t getScalarSizeInBits() const
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
unsigned getPointerAddrSpace() const
unsigned getByValSize() const
bool isUnknown() const
Returns true if we don't know any bits.
void resetAll()
Resets the known state of all bits.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasAllowContract() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SDValue ConvergenceControlToken
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
bool isBeforeLegalize() const